src/share/jaxws_classes/com/sun/xml/internal/dtdparser/Resolver.java

changeset 0
373ffda63c9a
child 637
9c07ef4934dd
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/share/jaxws_classes/com/sun/xml/internal/dtdparser/Resolver.java	Wed Apr 27 01:27:09 2016 +0800
     1.3 @@ -0,0 +1,448 @@
     1.4 +/*
     1.5 + * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.  Oracle designates this
    1.11 + * particular file as subject to the "Classpath" exception as provided
    1.12 + * by Oracle in the LICENSE file that accompanied this code.
    1.13 + *
    1.14 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.16 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.17 + * version 2 for more details (a copy is included in the LICENSE file that
    1.18 + * accompanied this code).
    1.19 + *
    1.20 + * You should have received a copy of the GNU General Public License version
    1.21 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.23 + *
    1.24 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    1.25 + * or visit www.oracle.com if you need additional information or have any
    1.26 + * questions.
    1.27 + */
    1.28 +
    1.29 +package com.sun.xml.internal.dtdparser;
    1.30 +
    1.31 +import org.xml.sax.EntityResolver;
    1.32 +import org.xml.sax.InputSource;
    1.33 +
    1.34 +import java.io.File;
    1.35 +import java.io.FileInputStream;
    1.36 +import java.io.IOException;
    1.37 +import java.io.InputStream;
    1.38 +import java.net.URL;
    1.39 +import java.net.URLConnection;
    1.40 +import java.util.Hashtable;
    1.41 +
    1.42 +/**
    1.43 + * This entity resolver class provides a number of utilities which can help
    1.44 + * managment of external parsed entities in XML.  These are commonly used
    1.45 + * to hold markup declarations that are to be used as part of a Document
    1.46 + * Type Declaration (DTD), or to hold text marked up with XML.
    1.47 + * <p/>
    1.48 + * <P> Features include: <UL>
    1.49 + * <p/>
    1.50 + * <LI> Static factory methods are provided for constructing SAX InputSource
    1.51 + * objects from Files, URLs, or MIME objects.  This eliminates a class of
    1.52 + * error-prone coding in applications.
    1.53 + * <p/>
    1.54 + * <LI> Character encodings for XML documents are correctly supported: <UL>
    1.55 + * <p/>
    1.56 + * <LI> The encodings defined in the RFCs for MIME content types
    1.57 + * (2046 for general MIME, and 2376 for XML in particular), are
    1.58 + * supported, handling <em>charset=...</em> attributes and accepting
    1.59 + * content types which are known to be safe for use with XML;
    1.60 + * <p/>
    1.61 + * <LI> The character encoding autodetection algorithm identified
    1.62 + * in the XML specification is used, and leverages all of
    1.63 + * the JDK 1.1 (and later) character encoding support.
    1.64 + * <p/>
    1.65 + * <LI> The use of MIME typing may optionally be disabled, forcing the
    1.66 + * use of autodetection, to support web servers which don't correctly
    1.67 + * report MIME types for XML.  For example, they may report text that
    1.68 + * is encoded in EUC-JP as being US-ASCII text, leading to fatal
    1.69 + * errors during parsing.
    1.70 + * <p/>
    1.71 + * <LI> The InputSource objects returned by this class always
    1.72 + * have a <code>java.io.Reader</code> available as the "character
    1.73 + * stream" property.
    1.74 + * <p/>
    1.75 + * </UL>
    1.76 + * <p/>
    1.77 + * <LI> Catalog entries can map public identifiers to Java resources or
    1.78 + * to local URLs.  These are used to reduce network dependencies and loads,
    1.79 + * and will often be used for external DTD components.  For example, packages
    1.80 + * shipping DTD files as resources in JAR files can eliminate network traffic
    1.81 + * when accessing them, and sites may provide local caches of common DTDs.
    1.82 + * Note that no particular catalog syntax is supported by this class, only
    1.83 + * the notion of a set of entries.
    1.84 + * <p/>
    1.85 + * </UL>
    1.86 + * <p/>
    1.87 + * <P> Subclasses can perform tasks such as supporting new URI schemes for
    1.88 + * URIs which are not URLs, such as URNs (see RFC 2396) or for accessing
    1.89 + * MIME entities which are part of a <em>multipart/related</em> group
    1.90 + * (see RFC 2387).  They may also be used to support particular catalog
    1.91 + * syntaxes, such as the <a href="http://www.oasis-open.org/html/a401.htm">
    1.92 + * SGML/Open Catalog (SOCAT)</a> which supports the SGML notion of "Formal
    1.93 + * Public Identifiers (FPIs).
    1.94 + *
    1.95 + * @author David Brownell
    1.96 + * @author Janet Koenig
    1.97 + * @version 1.3 00/02/24
    1.98 + */
    1.99 +public class Resolver implements EntityResolver {
   1.100 +    private boolean ignoringMIME;
   1.101 +
   1.102 +    // table mapping public IDs to (local) URIs
   1.103 +    private Hashtable id2uri;
   1.104 +
   1.105 +    // tables mapping public IDs to resources and classloaders
   1.106 +    private Hashtable id2resource;
   1.107 +    private Hashtable id2loader;
   1.108 +
   1.109 +    //
   1.110 +    // table of MIME content types (less attributes!) known
   1.111 +    // to be mostly "OK" to use with XML MIME entities.  the
   1.112 +    // idea is to rule out obvious braindamage ("image/jpg")
   1.113 +    // not the subtle stuff ("text/html") that might actually
   1.114 +    // be (or become) safe.
   1.115 +    //
   1.116 +    private static final String types [] = {
   1.117 +        "application/xml",
   1.118 +        "text/xml",
   1.119 +        "text/plain",
   1.120 +        "text/html", // commonly mis-inferred
   1.121 +        "application/x-netcdf", // this is often illegal XML
   1.122 +        "content/unknown"
   1.123 +    };
   1.124 +
   1.125 +    /**
   1.126 +     * Constructs a resolver.
   1.127 +     */
   1.128 +    public Resolver() {
   1.129 +    }
   1.130 +
   1.131 +    /**
   1.132 +     * Returns an input source, using the MIME type information and URL
   1.133 +     * scheme to statically determine the correct character encoding if
   1.134 +     * possible and otherwise autodetecting it.  MIME carefully specifies
   1.135 +     * the character encoding defaults, and how attributes of the content
   1.136 +     * type can change it.  XML further specifies two mandatory encodings
   1.137 +     * (UTF-8 and UTF-16), and includes an XML declaration which can be
   1.138 +     * used to internally label most documents encoded using US-ASCII
   1.139 +     * supersets (such as Shift_JIS, EUC-JP, ISO-2022-*, ISO-8859-*, and
   1.140 +     * more).
   1.141 +     * <p/>
   1.142 +     * <P> This method can be used to access XML documents which do not
   1.143 +     * have URIs (such as servlet input streams, or most JavaMail message
   1.144 +     * entities) and to support access methods such as HTTP POST or PUT.
   1.145 +     * (URLs normally return content using the GET method.)
   1.146 +     * <p/>
   1.147 +     * <P> <em> The caller should set the system ID in order for relative URIs
   1.148 +     * found in this document to be interpreted correctly.</em> In some cases,
   1.149 +     * a custom resolver will need to be used; for example, documents
   1.150 +     * may be grouped in a single MIME "multipart/related" bundle, and
   1.151 +     * relative URLs would refer to other documents in that bundle.
   1.152 +     *
   1.153 +     * @param contentType The MIME content type for the source for which
   1.154 +     *                    an InputSource is desired, such as <em>text/xml;charset=utf-8</em>.
   1.155 +     * @param stream      The input byte stream for the input source.
   1.156 +     * @param checkType   If true, this verifies that the content type is known
   1.157 +     *                    to support XML documents, such as <em>application/xml</em>.
   1.158 +     * @param scheme      Unless this is "file", unspecified MIME types
   1.159 +     *                    default to US-ASCII.  Files are always autodetected since most
   1.160 +     *                    file systems discard character encoding information.
   1.161 +     */
   1.162 +    public static InputSource createInputSource(String contentType,
   1.163 +                                                InputStream stream,
   1.164 +                                                boolean checkType,
   1.165 +                                                String scheme) throws IOException {
   1.166 +        InputSource retval;
   1.167 +        String charset = null;
   1.168 +
   1.169 +        if (contentType != null) {
   1.170 +            int index;
   1.171 +
   1.172 +            contentType = contentType.toLowerCase();
   1.173 +            index = contentType.indexOf(';');
   1.174 +            if (index != -1) {
   1.175 +                String attributes;
   1.176 +
   1.177 +                attributes = contentType.substring(index + 1);
   1.178 +                contentType = contentType.substring(0, index);
   1.179 +
   1.180 +                // use "charset=..." if it's available
   1.181 +                index = attributes.indexOf("charset");
   1.182 +                if (index != -1) {
   1.183 +                    attributes = attributes.substring(index + 7);
   1.184 +                    // strip out subsequent attributes
   1.185 +                    if ((index = attributes.indexOf(';')) != -1)
   1.186 +                        attributes = attributes.substring(0, index);
   1.187 +                    // find start of value
   1.188 +                    if ((index = attributes.indexOf('=')) != -1) {
   1.189 +                        attributes = attributes.substring(index + 1);
   1.190 +                        // strip out rfc822 comments
   1.191 +                        if ((index = attributes.indexOf('(')) != -1)
   1.192 +                            attributes = attributes.substring(0, index);
   1.193 +                        // double quotes are optional
   1.194 +                        if ((index = attributes.indexOf('"')) != -1) {
   1.195 +                            attributes = attributes.substring(index + 1);
   1.196 +                            attributes = attributes.substring(0,
   1.197 +                                    attributes.indexOf('"'));
   1.198 +                        }
   1.199 +                        charset = attributes.trim();
   1.200 +                        // XXX "\;", "\)" etc were mishandled above
   1.201 +                    }
   1.202 +                }
   1.203 +            }
   1.204 +
   1.205 +            //
   1.206 +            // Check MIME type.
   1.207 +            //
   1.208 +            if (checkType) {
   1.209 +                boolean isOK = false;
   1.210 +                for (int i = 0; i < types.length; i++)
   1.211 +                    if (types[i].equals(contentType)) {
   1.212 +                        isOK = true;
   1.213 +                        break;
   1.214 +                    }
   1.215 +                if (!isOK)
   1.216 +                    throw new IOException("Not XML: " + contentType);
   1.217 +            }
   1.218 +
   1.219 +            //
   1.220 +            // "text/*" MIME types have hard-wired character set
   1.221 +            // defaults, as specified in the RFCs.  For XML, we
   1.222 +            // ignore the system "file.encoding" property since
   1.223 +            // autodetection is more correct.
   1.224 +            //
   1.225 +            if (charset == null) {
   1.226 +                contentType = contentType.trim();
   1.227 +                if (contentType.startsWith("text/")) {
   1.228 +                    if (!"file".equalsIgnoreCase(scheme))
   1.229 +                        charset = "US-ASCII";
   1.230 +                }
   1.231 +                // "application/*" has no default
   1.232 +            }
   1.233 +        }
   1.234 +
   1.235 +        retval = new InputSource(XmlReader.createReader(stream, charset));
   1.236 +        retval.setByteStream(stream);
   1.237 +        retval.setEncoding(charset);
   1.238 +        return retval;
   1.239 +    }
   1.240 +
   1.241 +
   1.242 +    /**
   1.243 +     * Creates an input source from a given URI.
   1.244 +     *
   1.245 +     * @param uri       the URI (system ID) for the entity
   1.246 +     * @param checkType if true, the MIME content type for the entity
   1.247 +     *                  is checked for document type and character set encoding.
   1.248 +     */
   1.249 +    static public InputSource createInputSource(URL uri, boolean checkType)
   1.250 +            throws IOException {
   1.251 +
   1.252 +        URLConnection conn = uri.openConnection();
   1.253 +        InputSource retval;
   1.254 +
   1.255 +        if (checkType) {
   1.256 +            String contentType = conn.getContentType();
   1.257 +            retval = createInputSource(contentType, conn.getInputStream(),
   1.258 +                    false, uri.getProtocol());
   1.259 +        } else {
   1.260 +            retval = new InputSource(XmlReader.createReader(conn.getInputStream()));
   1.261 +        }
   1.262 +        retval.setSystemId(conn.getURL().toString());
   1.263 +        return retval;
   1.264 +    }
   1.265 +
   1.266 +
   1.267 +    /**
   1.268 +     * Creates an input source from a given file, autodetecting
   1.269 +     * the character encoding.
   1.270 +     */
   1.271 +    static public InputSource createInputSource(File file)
   1.272 +            throws IOException {
   1.273 +        InputSource retval;
   1.274 +        String path;
   1.275 +
   1.276 +        retval = new InputSource(XmlReader.createReader(new FileInputStream(file)));
   1.277 +
   1.278 +        // On JDK 1.2 and later, simplify this:
   1.279 +        //    "path = file.toURL ().toString ()".
   1.280 +        path = file.getAbsolutePath();
   1.281 +        if (File.separatorChar != '/')
   1.282 +            path = path.replace(File.separatorChar, '/');
   1.283 +        if (!path.startsWith("/"))
   1.284 +            path = "/" + path;
   1.285 +        if (!path.endsWith("/") && file.isDirectory())
   1.286 +            path = path + "/";
   1.287 +
   1.288 +        retval.setSystemId("file:" + path);
   1.289 +        return retval;
   1.290 +    }
   1.291 +
   1.292 +
   1.293 +    /**
   1.294 +     * <b>SAX:</b>
   1.295 +     * Resolve the given entity into an input source.  If the name can't
   1.296 +     * be mapped to a preferred form of the entity, the URI is used.  To
   1.297 +     * resolve the entity, first a local catalog mapping names to URIs is
   1.298 +     * consulted.  If no mapping is found there, a catalog mapping names
   1.299 +     * to java resources is consulted.  Finally, if neither mapping found
   1.300 +     * a copy of the entity, the specified URI is used.
   1.301 +     * <p/>
   1.302 +     * <P> When a URI is used, <a href="#createInputSource">
   1.303 +     * createInputSource</a> is used to correctly deduce the character
   1.304 +     * encoding used by this entity.  No MIME type checking is done.
   1.305 +     *
   1.306 +     * @param name Used to find alternate copies of the entity, when
   1.307 +     *             this value is non-null; this is the XML "public ID".
   1.308 +     * @param uri  Used when no alternate copy of the entity is found;
   1.309 +     *             this is the XML "system ID", normally a URI.
   1.310 +     */
   1.311 +    public InputSource resolveEntity(String name, String uri)
   1.312 +            throws IOException {
   1.313 +        InputSource retval;
   1.314 +        String mappedURI = name2uri(name);
   1.315 +        InputStream stream;
   1.316 +
   1.317 +        // prefer explicit URI mappings, then bundled resources...
   1.318 +        if (mappedURI == null && (stream = mapResource(name)) != null) {
   1.319 +            uri = "java:resource:" + (String) id2resource.get(name);
   1.320 +            retval = new InputSource(XmlReader.createReader(stream));
   1.321 +
   1.322 +            // ...and treat all URIs the same (as URLs for now).
   1.323 +        } else {
   1.324 +            URL url;
   1.325 +            URLConnection conn;
   1.326 +
   1.327 +            if (mappedURI != null)
   1.328 +                uri = mappedURI;
   1.329 +            else if (uri == null)
   1.330 +                return null;
   1.331 +
   1.332 +            url = new URL(uri);
   1.333 +            conn = url.openConnection();
   1.334 +            uri = conn.getURL().toString();
   1.335 +            // System.out.println ("++ URI: " + url);
   1.336 +            if (ignoringMIME)
   1.337 +                retval = new InputSource(XmlReader.createReader(conn.getInputStream()));
   1.338 +            else {
   1.339 +                String contentType = conn.getContentType();
   1.340 +                retval = createInputSource(contentType,
   1.341 +                        conn.getInputStream(),
   1.342 +                        false, url.getProtocol());
   1.343 +            }
   1.344 +        }
   1.345 +        retval.setSystemId(uri);
   1.346 +        retval.setPublicId(name);
   1.347 +        return retval;
   1.348 +    }
   1.349 +
   1.350 +
   1.351 +    /**
   1.352 +     * Returns true if this resolver is ignoring MIME types in the documents
   1.353 +     * it returns, to work around bugs in how servers have reported the
   1.354 +     * documents' MIME types.
   1.355 +     */
   1.356 +    public boolean isIgnoringMIME() {
   1.357 +        return ignoringMIME;
   1.358 +    }
   1.359 +
   1.360 +    /**
   1.361 +     * Tells the resolver whether to ignore MIME types in the documents it
   1.362 +     * retrieves.  Many web servers incorrectly assign text documents a
   1.363 +     * default character encoding, even when that is incorrect.  For example,
   1.364 +     * all HTTP text documents default to use ISO-8859-1 (used for Western
   1.365 +     * European languages), and other MIME sources default text documents
   1.366 +     * to use US-ASCII (a seven bit encoding).  For XML documents which
   1.367 +     * include text encoding declarations (as most should do), these server
   1.368 +     * bugs can be worked around by ignoring the MIME type entirely.
   1.369 +     */
   1.370 +    public void setIgnoringMIME(boolean value) {
   1.371 +        ignoringMIME = value;
   1.372 +    }
   1.373 +
   1.374 +
   1.375 +    // maps the public ID to an alternate URI, if one is registered
   1.376 +    private String name2uri(String publicId) {
   1.377 +        if (publicId == null || id2uri == null)
   1.378 +            return null;
   1.379 +        return (String) id2uri.get(publicId);
   1.380 +    }
   1.381 +
   1.382 +
   1.383 +    /**
   1.384 +     * Registers the given public ID as corresponding to a particular
   1.385 +     * URI, typically a local copy.  This URI will be used in preference
   1.386 +     * to ones provided as system IDs in XML entity declarations.  This
   1.387 +     * mechanism would most typically be used for Document Type Definitions
   1.388 +     * (DTDs), where the public IDs are formally managed and versioned.
   1.389 +     *
   1.390 +     * @param publicId The managed public ID being mapped
   1.391 +     * @param uri      The URI of the preferred copy of that entity
   1.392 +     */
   1.393 +    public void registerCatalogEntry(String publicId,
   1.394 +                                     String uri) {
   1.395 +        if (id2uri == null)
   1.396 +            id2uri = new Hashtable(17);
   1.397 +        id2uri.put(publicId, uri);
   1.398 +    }
   1.399 +
   1.400 +
   1.401 +    // return the resource as a stream
   1.402 +    private InputStream mapResource(String publicId) {
   1.403 +        // System.out.println ("++ PUBLIC: " + publicId);
   1.404 +        if (publicId == null || id2resource == null)
   1.405 +            return null;
   1.406 +
   1.407 +        String resourceName = (String) id2resource.get(publicId);
   1.408 +        ClassLoader loader = null;
   1.409 +
   1.410 +        if (resourceName == null)
   1.411 +            return null;
   1.412 +        // System.out.println ("++ Resource: " + resourceName);
   1.413 +
   1.414 +        if (id2loader != null)
   1.415 +            loader = (ClassLoader) id2loader.get(publicId);
   1.416 +        // System.out.println ("++ Loader: " + loader);
   1.417 +        if (loader == null)
   1.418 +            return ClassLoader.getSystemResourceAsStream(resourceName);
   1.419 +        return loader.getResourceAsStream(resourceName);
   1.420 +    }
   1.421 +
   1.422 +    /**
   1.423 +     * Registers a given public ID as corresponding to a particular Java
   1.424 +     * resource in a given class loader, typically distributed with a
   1.425 +     * software package.  This resource will be preferred over system IDs
   1.426 +     * included in XML documents.  This mechanism should most typically be
   1.427 +     * used for Document Type Definitions (DTDs), where the public IDs are
   1.428 +     * formally managed and versioned.
   1.429 +     * <p/>
   1.430 +     * <P> If a mapping to a URI has been provided, that mapping takes
   1.431 +     * precedence over this one.
   1.432 +     *
   1.433 +     * @param publicId     The managed public ID being mapped
   1.434 +     * @param resourceName The name of the Java resource
   1.435 +     * @param loader       The class loader holding the resource, or null if
   1.436 +     *                     it is a system resource.
   1.437 +     */
   1.438 +    public void registerCatalogEntry(String publicId,
   1.439 +                                     String resourceName,
   1.440 +                                     ClassLoader loader) {
   1.441 +        if (id2resource == null)
   1.442 +            id2resource = new Hashtable(17);
   1.443 +        id2resource.put(publicId, resourceName);
   1.444 +
   1.445 +        if (loader != null) {
   1.446 +            if (id2loader == null)
   1.447 +                id2loader = new Hashtable(17);
   1.448 +            id2loader.put(publicId, loader);
   1.449 +        }
   1.450 +    }
   1.451 +}

mercurial