src/share/jaxws_classes/com/sun/xml/internal/dtdparser/Resolver.java

Tue, 06 Mar 2012 16:09:35 -0800

author
ohair
date
Tue, 06 Mar 2012 16:09:35 -0800
changeset 286
f50545b5e2f1
child 397
b99d7e355d4b
permissions
-rw-r--r--

7150322: Stop using drop source bundles in jaxws
Reviewed-by: darcy, ohrstrom

     1 /*
     2  * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  */
    26 package com.sun.xml.internal.dtdparser;
    28 import org.xml.sax.EntityResolver;
    29 import org.xml.sax.InputSource;
    31 import java.io.File;
    32 import java.io.FileInputStream;
    33 import java.io.IOException;
    34 import java.io.InputStream;
    35 import java.net.URL;
    36 import java.net.URLConnection;
    37 import java.util.Hashtable;
    39 /**
    40  * This entity resolver class provides a number of utilities which can help
    41  * managment of external parsed entities in XML.  These are commonly used
    42  * to hold markup declarations that are to be used as part of a Document
    43  * Type Declaration (DTD), or to hold text marked up with XML.
    44  * <p/>
    45  * <P> Features include: <UL>
    46  * <p/>
    47  * <LI> Static factory methods are provided for constructing SAX InputSource
    48  * objects from Files, URLs, or MIME objects.  This eliminates a class of
    49  * error-prone coding in applications.
    50  * <p/>
    51  * <LI> Character encodings for XML documents are correctly supported: <UL>
    52  * <p/>
    53  * <LI> The encodings defined in the RFCs for MIME content types
    54  * (2046 for general MIME, and 2376 for XML in particular), are
    55  * supported, handling <em>charset=...</em> attributes and accepting
    56  * content types which are known to be safe for use with XML;
    57  * <p/>
    58  * <LI> The character encoding autodetection algorithm identified
    59  * in the XML specification is used, and leverages all of
    60  * the JDK 1.1 (and later) character encoding support.
    61  * <p/>
    62  * <LI> The use of MIME typing may optionally be disabled, forcing the
    63  * use of autodetection, to support web servers which don't correctly
    64  * report MIME types for XML.  For example, they may report text that
    65  * is encoded in EUC-JP as being US-ASCII text, leading to fatal
    66  * errors during parsing.
    67  * <p/>
    68  * <LI> The InputSource objects returned by this class always
    69  * have a <code>java.io.Reader</code> available as the "character
    70  * stream" property.
    71  * <p/>
    72  * </UL>
    73  * <p/>
    74  * <LI> Catalog entries can map public identifiers to Java resources or
    75  * to local URLs.  These are used to reduce network dependencies and loads,
    76  * and will often be used for external DTD components.  For example, packages
    77  * shipping DTD files as resources in JAR files can eliminate network traffic
    78  * when accessing them, and sites may provide local caches of common DTDs.
    79  * Note that no particular catalog syntax is supported by this class, only
    80  * the notion of a set of entries.
    81  * <p/>
    82  * </UL>
    83  * <p/>
    84  * <P> Subclasses can perform tasks such as supporting new URI schemes for
    85  * URIs which are not URLs, such as URNs (see RFC 2396) or for accessing
    86  * MIME entities which are part of a <em>multipart/related</em> group
    87  * (see RFC 2387).  They may also be used to support particular catalog
    88  * syntaxes, such as the <a href="http://www.oasis-open.org/html/a401.htm">
    89  * SGML/Open Catalog (SOCAT)</a> which supports the SGML notion of "Formal
    90  * Public Identifiers (FPIs).
    91  *
    92  * @author David Brownell
    93  * @author Janet Koenig
    94  * @version 1.3 00/02/24
    95  */
    96 public class Resolver implements EntityResolver {
    97     private boolean ignoringMIME;
    99     // table mapping public IDs to (local) URIs
   100     private Hashtable id2uri;
   102     // tables mapping public IDs to resources and classloaders
   103     private Hashtable id2resource;
   104     private Hashtable id2loader;
   106     //
   107     // table of MIME content types (less attributes!) known
   108     // to be mostly "OK" to use with XML MIME entities.  the
   109     // idea is to rule out obvious braindamage ("image/jpg")
   110     // not the subtle stuff ("text/html") that might actually
   111     // be (or become) safe.
   112     //
   113     private static final String types [] = {
   114         "application/xml",
   115         "text/xml",
   116         "text/plain",
   117         "text/html", // commonly mis-inferred
   118         "application/x-netcdf", // this is often illegal XML
   119         "content/unknown"
   120     };
   122     /**
   123      * Constructs a resolver.
   124      */
   125     public Resolver() {
   126     }
   128     /**
   129      * Returns an input source, using the MIME type information and URL
   130      * scheme to statically determine the correct character encoding if
   131      * possible and otherwise autodetecting it.  MIME carefully specifies
   132      * the character encoding defaults, and how attributes of the content
   133      * type can change it.  XML further specifies two mandatory encodings
   134      * (UTF-8 and UTF-16), and includes an XML declaration which can be
   135      * used to internally label most documents encoded using US-ASCII
   136      * supersets (such as Shift_JIS, EUC-JP, ISO-2022-*, ISO-8859-*, and
   137      * more).
   138      * <p/>
   139      * <P> This method can be used to access XML documents which do not
   140      * have URIs (such as servlet input streams, or most JavaMail message
   141      * entities) and to support access methods such as HTTP POST or PUT.
   142      * (URLs normally return content using the GET method.)
   143      * <p/>
   144      * <P> <em> The caller should set the system ID in order for relative URIs
   145      * found in this document to be interpreted correctly.</em> In some cases,
   146      * a custom resolver will need to be used; for example, documents
   147      * may be grouped in a single MIME "multipart/related" bundle, and
   148      * relative URLs would refer to other documents in that bundle.
   149      *
   150      * @param contentType The MIME content type for the source for which
   151      *                    an InputSource is desired, such as <em>text/xml;charset=utf-8</em>.
   152      * @param stream      The input byte stream for the input source.
   153      * @param checkType   If true, this verifies that the content type is known
   154      *                    to support XML documents, such as <em>application/xml</em>.
   155      * @param scheme      Unless this is "file", unspecified MIME types
   156      *                    default to US-ASCII.  Files are always autodetected since most
   157      *                    file systems discard character encoding information.
   158      */
   159     public static InputSource createInputSource(String contentType,
   160                                                 InputStream stream,
   161                                                 boolean checkType,
   162                                                 String scheme) throws IOException {
   163         InputSource retval;
   164         String charset = null;
   166         if (contentType != null) {
   167             int index;
   169             contentType = contentType.toLowerCase();
   170             index = contentType.indexOf(';');
   171             if (index != -1) {
   172                 String attributes;
   174                 attributes = contentType.substring(index + 1);
   175                 contentType = contentType.substring(0, index);
   177                 // use "charset=..." if it's available
   178                 index = attributes.indexOf("charset");
   179                 if (index != -1) {
   180                     attributes = attributes.substring(index + 7);
   181                     // strip out subsequent attributes
   182                     if ((index = attributes.indexOf(';')) != -1)
   183                         attributes = attributes.substring(0, index);
   184                     // find start of value
   185                     if ((index = attributes.indexOf('=')) != -1) {
   186                         attributes = attributes.substring(index + 1);
   187                         // strip out rfc822 comments
   188                         if ((index = attributes.indexOf('(')) != -1)
   189                             attributes = attributes.substring(0, index);
   190                         // double quotes are optional
   191                         if ((index = attributes.indexOf('"')) != -1) {
   192                             attributes = attributes.substring(index + 1);
   193                             attributes = attributes.substring(0,
   194                                     attributes.indexOf('"'));
   195                         }
   196                         charset = attributes.trim();
   197                         // XXX "\;", "\)" etc were mishandled above
   198                     }
   199                 }
   200             }
   202             //
   203             // Check MIME type.
   204             //
   205             if (checkType) {
   206                 boolean isOK = false;
   207                 for (int i = 0; i < types.length; i++)
   208                     if (types[i].equals(contentType)) {
   209                         isOK = true;
   210                         break;
   211                     }
   212                 if (!isOK)
   213                     throw new IOException("Not XML: " + contentType);
   214             }
   216             //
   217             // "text/*" MIME types have hard-wired character set
   218             // defaults, as specified in the RFCs.  For XML, we
   219             // ignore the system "file.encoding" property since
   220             // autodetection is more correct.
   221             //
   222             if (charset == null) {
   223                 contentType = contentType.trim();
   224                 if (contentType.startsWith("text/")) {
   225                     if (!"file".equalsIgnoreCase(scheme))
   226                         charset = "US-ASCII";
   227                 }
   228                 // "application/*" has no default
   229             }
   230         }
   232         retval = new InputSource(XmlReader.createReader(stream, charset));
   233         retval.setByteStream(stream);
   234         retval.setEncoding(charset);
   235         return retval;
   236     }
   239     /**
   240      * Creates an input source from a given URI.
   241      *
   242      * @param uri       the URI (system ID) for the entity
   243      * @param checkType if true, the MIME content type for the entity
   244      *                  is checked for document type and character set encoding.
   245      */
   246     static public InputSource createInputSource(URL uri, boolean checkType)
   247             throws IOException {
   249         URLConnection conn = uri.openConnection();
   250         InputSource retval;
   252         if (checkType) {
   253             String contentType = conn.getContentType();
   254             retval = createInputSource(contentType, conn.getInputStream(),
   255                     false, uri.getProtocol());
   256         } else {
   257             retval = new InputSource(XmlReader.createReader(conn.getInputStream()));
   258         }
   259         retval.setSystemId(conn.getURL().toString());
   260         return retval;
   261     }
   264     /**
   265      * Creates an input source from a given file, autodetecting
   266      * the character encoding.
   267      */
   268     static public InputSource createInputSource(File file)
   269             throws IOException {
   270         InputSource retval;
   271         String path;
   273         retval = new InputSource(XmlReader.createReader(new FileInputStream(file)));
   275         // On JDK 1.2 and later, simplify this:
   276         //    "path = file.toURL ().toString ()".
   277         path = file.getAbsolutePath();
   278         if (File.separatorChar != '/')
   279             path = path.replace(File.separatorChar, '/');
   280         if (!path.startsWith("/"))
   281             path = "/" + path;
   282         if (!path.endsWith("/") && file.isDirectory())
   283             path = path + "/";
   285         retval.setSystemId("file:" + path);
   286         return retval;
   287     }
   290     /**
   291      * <b>SAX:</b>
   292      * Resolve the given entity into an input source.  If the name can't
   293      * be mapped to a preferred form of the entity, the URI is used.  To
   294      * resolve the entity, first a local catalog mapping names to URIs is
   295      * consulted.  If no mapping is found there, a catalog mapping names
   296      * to java resources is consulted.  Finally, if neither mapping found
   297      * a copy of the entity, the specified URI is used.
   298      * <p/>
   299      * <P> When a URI is used, <a href="#createInputSource">
   300      * createInputSource</a> is used to correctly deduce the character
   301      * encoding used by this entity.  No MIME type checking is done.
   302      *
   303      * @param name Used to find alternate copies of the entity, when
   304      *             this value is non-null; this is the XML "public ID".
   305      * @param uri  Used when no alternate copy of the entity is found;
   306      *             this is the XML "system ID", normally a URI.
   307      */
   308     public InputSource resolveEntity(String name, String uri)
   309             throws IOException {
   310         InputSource retval;
   311         String mappedURI = name2uri(name);
   312         InputStream stream;
   314         // prefer explicit URI mappings, then bundled resources...
   315         if (mappedURI == null && (stream = mapResource(name)) != null) {
   316             uri = "java:resource:" + (String) id2resource.get(name);
   317             retval = new InputSource(XmlReader.createReader(stream));
   319             // ...and treat all URIs the same (as URLs for now).
   320         } else {
   321             URL url;
   322             URLConnection conn;
   324             if (mappedURI != null)
   325                 uri = mappedURI;
   326             else if (uri == null)
   327                 return null;
   329             url = new URL(uri);
   330             conn = url.openConnection();
   331             uri = conn.getURL().toString();
   332             // System.out.println ("++ URI: " + url);
   333             if (ignoringMIME)
   334                 retval = new InputSource(XmlReader.createReader(conn.getInputStream()));
   335             else {
   336                 String contentType = conn.getContentType();
   337                 retval = createInputSource(contentType,
   338                         conn.getInputStream(),
   339                         false, url.getProtocol());
   340             }
   341         }
   342         retval.setSystemId(uri);
   343         retval.setPublicId(name);
   344         return retval;
   345     }
   348     /**
   349      * Returns true if this resolver is ignoring MIME types in the documents
   350      * it returns, to work around bugs in how servers have reported the
   351      * documents' MIME types.
   352      */
   353     public boolean isIgnoringMIME() {
   354         return ignoringMIME;
   355     }
   357     /**
   358      * Tells the resolver whether to ignore MIME types in the documents it
   359      * retrieves.  Many web servers incorrectly assign text documents a
   360      * default character encoding, even when that is incorrect.  For example,
   361      * all HTTP text documents default to use ISO-8859-1 (used for Western
   362      * European languages), and other MIME sources default text documents
   363      * to use US-ASCII (a seven bit encoding).  For XML documents which
   364      * include text encoding declarations (as most should do), these server
   365      * bugs can be worked around by ignoring the MIME type entirely.
   366      */
   367     public void setIgnoringMIME(boolean value) {
   368         ignoringMIME = value;
   369     }
   372     // maps the public ID to an alternate URI, if one is registered
   373     private String name2uri(String publicId) {
   374         if (publicId == null || id2uri == null)
   375             return null;
   376         return (String) id2uri.get(publicId);
   377     }
   380     /**
   381      * Registers the given public ID as corresponding to a particular
   382      * URI, typically a local copy.  This URI will be used in preference
   383      * to ones provided as system IDs in XML entity declarations.  This
   384      * mechanism would most typically be used for Document Type Definitions
   385      * (DTDs), where the public IDs are formally managed and versioned.
   386      *
   387      * @param publicId The managed public ID being mapped
   388      * @param uri      The URI of the preferred copy of that entity
   389      */
   390     public void registerCatalogEntry(String publicId,
   391                                      String uri) {
   392         if (id2uri == null)
   393             id2uri = new Hashtable(17);
   394         id2uri.put(publicId, uri);
   395     }
   398     // return the resource as a stream
   399     private InputStream mapResource(String publicId) {
   400         // System.out.println ("++ PUBLIC: " + publicId);
   401         if (publicId == null || id2resource == null)
   402             return null;
   404         String resourceName = (String) id2resource.get(publicId);
   405         ClassLoader loader = null;
   407         if (resourceName == null)
   408             return null;
   409         // System.out.println ("++ Resource: " + resourceName);
   411         if (id2loader != null)
   412             loader = (ClassLoader) id2loader.get(publicId);
   413         // System.out.println ("++ Loader: " + loader);
   414         if (loader == null)
   415             return ClassLoader.getSystemResourceAsStream(resourceName);
   416         return loader.getResourceAsStream(resourceName);
   417     }
   419     /**
   420      * Registers a given public ID as corresponding to a particular Java
   421      * resource in a given class loader, typically distributed with a
   422      * software package.  This resource will be preferred over system IDs
   423      * included in XML documents.  This mechanism should most typically be
   424      * used for Document Type Definitions (DTDs), where the public IDs are
   425      * formally managed and versioned.
   426      * <p/>
   427      * <P> If a mapping to a URI has been provided, that mapping takes
   428      * precedence over this one.
   429      *
   430      * @param publicId     The managed public ID being mapped
   431      * @param resourceName The name of the Java resource
   432      * @param loader       The class loader holding the resource, or null if
   433      *                     it is a system resource.
   434      */
   435     public void registerCatalogEntry(String publicId,
   436                                      String resourceName,
   437                                      ClassLoader loader) {
   438         if (id2resource == null)
   439             id2resource = new Hashtable(17);
   440         id2resource.put(publicId, resourceName);
   442         if (loader != null) {
   443             if (id2loader == null)
   444                 id2loader = new Hashtable(17);
   445             id2loader.put(publicId, loader);
   446         }
   447     }
   448 }

mercurial