src/share/jaxws_classes/com/sun/xml/internal/dtdparser/Resolver.java

Thu, 31 Aug 2017 15:18:52 +0800

author
aoqi
date
Thu, 31 Aug 2017 15:18:52 +0800
changeset 637
9c07ef4934dd
parent 397
b99d7e355d4b
parent 0
373ffda63c9a
permissions
-rw-r--r--

merge

aoqi@0 1 /*
aoqi@0 2 * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
aoqi@0 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
aoqi@0 4 *
aoqi@0 5 * This code is free software; you can redistribute it and/or modify it
aoqi@0 6 * under the terms of the GNU General Public License version 2 only, as
aoqi@0 7 * published by the Free Software Foundation. Oracle designates this
aoqi@0 8 * particular file as subject to the "Classpath" exception as provided
aoqi@0 9 * by Oracle in the LICENSE file that accompanied this code.
aoqi@0 10 *
aoqi@0 11 * This code is distributed in the hope that it will be useful, but WITHOUT
aoqi@0 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
aoqi@0 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
aoqi@0 14 * version 2 for more details (a copy is included in the LICENSE file that
aoqi@0 15 * accompanied this code).
aoqi@0 16 *
aoqi@0 17 * You should have received a copy of the GNU General Public License version
aoqi@0 18 * 2 along with this work; if not, write to the Free Software Foundation,
aoqi@0 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
aoqi@0 20 *
aoqi@0 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
aoqi@0 22 * or visit www.oracle.com if you need additional information or have any
aoqi@0 23 * questions.
aoqi@0 24 */
aoqi@0 25
aoqi@0 26 package com.sun.xml.internal.dtdparser;
aoqi@0 27
aoqi@0 28 import org.xml.sax.EntityResolver;
aoqi@0 29 import org.xml.sax.InputSource;
aoqi@0 30
aoqi@0 31 import java.io.File;
aoqi@0 32 import java.io.FileInputStream;
aoqi@0 33 import java.io.IOException;
aoqi@0 34 import java.io.InputStream;
aoqi@0 35 import java.net.URL;
aoqi@0 36 import java.net.URLConnection;
aoqi@0 37 import java.util.Hashtable;
aoqi@0 38
aoqi@0 39 /**
aoqi@0 40 * This entity resolver class provides a number of utilities which can help
aoqi@0 41 * managment of external parsed entities in XML. These are commonly used
aoqi@0 42 * to hold markup declarations that are to be used as part of a Document
aoqi@0 43 * Type Declaration (DTD), or to hold text marked up with XML.
aoqi@0 44 * <p/>
aoqi@0 45 * <P> Features include: <UL>
aoqi@0 46 * <p/>
aoqi@0 47 * <LI> Static factory methods are provided for constructing SAX InputSource
aoqi@0 48 * objects from Files, URLs, or MIME objects. This eliminates a class of
aoqi@0 49 * error-prone coding in applications.
aoqi@0 50 * <p/>
aoqi@0 51 * <LI> Character encodings for XML documents are correctly supported: <UL>
aoqi@0 52 * <p/>
aoqi@0 53 * <LI> The encodings defined in the RFCs for MIME content types
aoqi@0 54 * (2046 for general MIME, and 2376 for XML in particular), are
aoqi@0 55 * supported, handling <em>charset=...</em> attributes and accepting
aoqi@0 56 * content types which are known to be safe for use with XML;
aoqi@0 57 * <p/>
aoqi@0 58 * <LI> The character encoding autodetection algorithm identified
aoqi@0 59 * in the XML specification is used, and leverages all of
aoqi@0 60 * the JDK 1.1 (and later) character encoding support.
aoqi@0 61 * <p/>
aoqi@0 62 * <LI> The use of MIME typing may optionally be disabled, forcing the
aoqi@0 63 * use of autodetection, to support web servers which don't correctly
aoqi@0 64 * report MIME types for XML. For example, they may report text that
aoqi@0 65 * is encoded in EUC-JP as being US-ASCII text, leading to fatal
aoqi@0 66 * errors during parsing.
aoqi@0 67 * <p/>
aoqi@0 68 * <LI> The InputSource objects returned by this class always
aoqi@0 69 * have a <code>java.io.Reader</code> available as the "character
aoqi@0 70 * stream" property.
aoqi@0 71 * <p/>
aoqi@0 72 * </UL>
aoqi@0 73 * <p/>
aoqi@0 74 * <LI> Catalog entries can map public identifiers to Java resources or
aoqi@0 75 * to local URLs. These are used to reduce network dependencies and loads,
aoqi@0 76 * and will often be used for external DTD components. For example, packages
aoqi@0 77 * shipping DTD files as resources in JAR files can eliminate network traffic
aoqi@0 78 * when accessing them, and sites may provide local caches of common DTDs.
aoqi@0 79 * Note that no particular catalog syntax is supported by this class, only
aoqi@0 80 * the notion of a set of entries.
aoqi@0 81 * <p/>
aoqi@0 82 * </UL>
aoqi@0 83 * <p/>
aoqi@0 84 * <P> Subclasses can perform tasks such as supporting new URI schemes for
aoqi@0 85 * URIs which are not URLs, such as URNs (see RFC 2396) or for accessing
aoqi@0 86 * MIME entities which are part of a <em>multipart/related</em> group
aoqi@0 87 * (see RFC 2387). They may also be used to support particular catalog
aoqi@0 88 * syntaxes, such as the <a href="http://www.oasis-open.org/html/a401.htm">
aoqi@0 89 * SGML/Open Catalog (SOCAT)</a> which supports the SGML notion of "Formal
aoqi@0 90 * Public Identifiers (FPIs).
aoqi@0 91 *
aoqi@0 92 * @author David Brownell
aoqi@0 93 * @author Janet Koenig
aoqi@0 94 * @version 1.3 00/02/24
aoqi@0 95 */
aoqi@0 96 public class Resolver implements EntityResolver {
aoqi@0 97 private boolean ignoringMIME;
aoqi@0 98
aoqi@0 99 // table mapping public IDs to (local) URIs
aoqi@0 100 private Hashtable id2uri;
aoqi@0 101
aoqi@0 102 // tables mapping public IDs to resources and classloaders
aoqi@0 103 private Hashtable id2resource;
aoqi@0 104 private Hashtable id2loader;
aoqi@0 105
aoqi@0 106 //
aoqi@0 107 // table of MIME content types (less attributes!) known
aoqi@0 108 // to be mostly "OK" to use with XML MIME entities. the
aoqi@0 109 // idea is to rule out obvious braindamage ("image/jpg")
aoqi@0 110 // not the subtle stuff ("text/html") that might actually
aoqi@0 111 // be (or become) safe.
aoqi@0 112 //
aoqi@0 113 private static final String types [] = {
aoqi@0 114 "application/xml",
aoqi@0 115 "text/xml",
aoqi@0 116 "text/plain",
aoqi@0 117 "text/html", // commonly mis-inferred
aoqi@0 118 "application/x-netcdf", // this is often illegal XML
aoqi@0 119 "content/unknown"
aoqi@0 120 };
aoqi@0 121
aoqi@0 122 /**
aoqi@0 123 * Constructs a resolver.
aoqi@0 124 */
aoqi@0 125 public Resolver() {
aoqi@0 126 }
aoqi@0 127
aoqi@0 128 /**
aoqi@0 129 * Returns an input source, using the MIME type information and URL
aoqi@0 130 * scheme to statically determine the correct character encoding if
aoqi@0 131 * possible and otherwise autodetecting it. MIME carefully specifies
aoqi@0 132 * the character encoding defaults, and how attributes of the content
aoqi@0 133 * type can change it. XML further specifies two mandatory encodings
aoqi@0 134 * (UTF-8 and UTF-16), and includes an XML declaration which can be
aoqi@0 135 * used to internally label most documents encoded using US-ASCII
aoqi@0 136 * supersets (such as Shift_JIS, EUC-JP, ISO-2022-*, ISO-8859-*, and
aoqi@0 137 * more).
aoqi@0 138 * <p/>
aoqi@0 139 * <P> This method can be used to access XML documents which do not
aoqi@0 140 * have URIs (such as servlet input streams, or most JavaMail message
aoqi@0 141 * entities) and to support access methods such as HTTP POST or PUT.
aoqi@0 142 * (URLs normally return content using the GET method.)
aoqi@0 143 * <p/>
aoqi@0 144 * <P> <em> The caller should set the system ID in order for relative URIs
aoqi@0 145 * found in this document to be interpreted correctly.</em> In some cases,
aoqi@0 146 * a custom resolver will need to be used; for example, documents
aoqi@0 147 * may be grouped in a single MIME "multipart/related" bundle, and
aoqi@0 148 * relative URLs would refer to other documents in that bundle.
aoqi@0 149 *
aoqi@0 150 * @param contentType The MIME content type for the source for which
aoqi@0 151 * an InputSource is desired, such as <em>text/xml;charset=utf-8</em>.
aoqi@0 152 * @param stream The input byte stream for the input source.
aoqi@0 153 * @param checkType If true, this verifies that the content type is known
aoqi@0 154 * to support XML documents, such as <em>application/xml</em>.
aoqi@0 155 * @param scheme Unless this is "file", unspecified MIME types
aoqi@0 156 * default to US-ASCII. Files are always autodetected since most
aoqi@0 157 * file systems discard character encoding information.
aoqi@0 158 */
aoqi@0 159 public static InputSource createInputSource(String contentType,
aoqi@0 160 InputStream stream,
aoqi@0 161 boolean checkType,
aoqi@0 162 String scheme) throws IOException {
aoqi@0 163 InputSource retval;
aoqi@0 164 String charset = null;
aoqi@0 165
aoqi@0 166 if (contentType != null) {
aoqi@0 167 int index;
aoqi@0 168
aoqi@0 169 contentType = contentType.toLowerCase();
aoqi@0 170 index = contentType.indexOf(';');
aoqi@0 171 if (index != -1) {
aoqi@0 172 String attributes;
aoqi@0 173
aoqi@0 174 attributes = contentType.substring(index + 1);
aoqi@0 175 contentType = contentType.substring(0, index);
aoqi@0 176
aoqi@0 177 // use "charset=..." if it's available
aoqi@0 178 index = attributes.indexOf("charset");
aoqi@0 179 if (index != -1) {
aoqi@0 180 attributes = attributes.substring(index + 7);
aoqi@0 181 // strip out subsequent attributes
aoqi@0 182 if ((index = attributes.indexOf(';')) != -1)
aoqi@0 183 attributes = attributes.substring(0, index);
aoqi@0 184 // find start of value
aoqi@0 185 if ((index = attributes.indexOf('=')) != -1) {
aoqi@0 186 attributes = attributes.substring(index + 1);
aoqi@0 187 // strip out rfc822 comments
aoqi@0 188 if ((index = attributes.indexOf('(')) != -1)
aoqi@0 189 attributes = attributes.substring(0, index);
aoqi@0 190 // double quotes are optional
aoqi@0 191 if ((index = attributes.indexOf('"')) != -1) {
aoqi@0 192 attributes = attributes.substring(index + 1);
aoqi@0 193 attributes = attributes.substring(0,
aoqi@0 194 attributes.indexOf('"'));
aoqi@0 195 }
aoqi@0 196 charset = attributes.trim();
aoqi@0 197 // XXX "\;", "\)" etc were mishandled above
aoqi@0 198 }
aoqi@0 199 }
aoqi@0 200 }
aoqi@0 201
aoqi@0 202 //
aoqi@0 203 // Check MIME type.
aoqi@0 204 //
aoqi@0 205 if (checkType) {
aoqi@0 206 boolean isOK = false;
aoqi@0 207 for (int i = 0; i < types.length; i++)
aoqi@0 208 if (types[i].equals(contentType)) {
aoqi@0 209 isOK = true;
aoqi@0 210 break;
aoqi@0 211 }
aoqi@0 212 if (!isOK)
aoqi@0 213 throw new IOException("Not XML: " + contentType);
aoqi@0 214 }
aoqi@0 215
aoqi@0 216 //
aoqi@0 217 // "text/*" MIME types have hard-wired character set
aoqi@0 218 // defaults, as specified in the RFCs. For XML, we
aoqi@0 219 // ignore the system "file.encoding" property since
aoqi@0 220 // autodetection is more correct.
aoqi@0 221 //
aoqi@0 222 if (charset == null) {
aoqi@0 223 contentType = contentType.trim();
aoqi@0 224 if (contentType.startsWith("text/")) {
aoqi@0 225 if (!"file".equalsIgnoreCase(scheme))
aoqi@0 226 charset = "US-ASCII";
aoqi@0 227 }
aoqi@0 228 // "application/*" has no default
aoqi@0 229 }
aoqi@0 230 }
aoqi@0 231
aoqi@0 232 retval = new InputSource(XmlReader.createReader(stream, charset));
aoqi@0 233 retval.setByteStream(stream);
aoqi@0 234 retval.setEncoding(charset);
aoqi@0 235 return retval;
aoqi@0 236 }
aoqi@0 237
aoqi@0 238
aoqi@0 239 /**
aoqi@0 240 * Creates an input source from a given URI.
aoqi@0 241 *
aoqi@0 242 * @param uri the URI (system ID) for the entity
aoqi@0 243 * @param checkType if true, the MIME content type for the entity
aoqi@0 244 * is checked for document type and character set encoding.
aoqi@0 245 */
aoqi@0 246 static public InputSource createInputSource(URL uri, boolean checkType)
aoqi@0 247 throws IOException {
aoqi@0 248
aoqi@0 249 URLConnection conn = uri.openConnection();
aoqi@0 250 InputSource retval;
aoqi@0 251
aoqi@0 252 if (checkType) {
aoqi@0 253 String contentType = conn.getContentType();
aoqi@0 254 retval = createInputSource(contentType, conn.getInputStream(),
aoqi@0 255 false, uri.getProtocol());
aoqi@0 256 } else {
aoqi@0 257 retval = new InputSource(XmlReader.createReader(conn.getInputStream()));
aoqi@0 258 }
aoqi@0 259 retval.setSystemId(conn.getURL().toString());
aoqi@0 260 return retval;
aoqi@0 261 }
aoqi@0 262
aoqi@0 263
aoqi@0 264 /**
aoqi@0 265 * Creates an input source from a given file, autodetecting
aoqi@0 266 * the character encoding.
aoqi@0 267 */
aoqi@0 268 static public InputSource createInputSource(File file)
aoqi@0 269 throws IOException {
aoqi@0 270 InputSource retval;
aoqi@0 271 String path;
aoqi@0 272
aoqi@0 273 retval = new InputSource(XmlReader.createReader(new FileInputStream(file)));
aoqi@0 274
aoqi@0 275 // On JDK 1.2 and later, simplify this:
aoqi@0 276 // "path = file.toURL ().toString ()".
aoqi@0 277 path = file.getAbsolutePath();
aoqi@0 278 if (File.separatorChar != '/')
aoqi@0 279 path = path.replace(File.separatorChar, '/');
aoqi@0 280 if (!path.startsWith("/"))
aoqi@0 281 path = "/" + path;
aoqi@0 282 if (!path.endsWith("/") && file.isDirectory())
aoqi@0 283 path = path + "/";
aoqi@0 284
aoqi@0 285 retval.setSystemId("file:" + path);
aoqi@0 286 return retval;
aoqi@0 287 }
aoqi@0 288
aoqi@0 289
aoqi@0 290 /**
aoqi@0 291 * <b>SAX:</b>
aoqi@0 292 * Resolve the given entity into an input source. If the name can't
aoqi@0 293 * be mapped to a preferred form of the entity, the URI is used. To
aoqi@0 294 * resolve the entity, first a local catalog mapping names to URIs is
aoqi@0 295 * consulted. If no mapping is found there, a catalog mapping names
aoqi@0 296 * to java resources is consulted. Finally, if neither mapping found
aoqi@0 297 * a copy of the entity, the specified URI is used.
aoqi@0 298 * <p/>
aoqi@0 299 * <P> When a URI is used, <a href="#createInputSource">
aoqi@0 300 * createInputSource</a> is used to correctly deduce the character
aoqi@0 301 * encoding used by this entity. No MIME type checking is done.
aoqi@0 302 *
aoqi@0 303 * @param name Used to find alternate copies of the entity, when
aoqi@0 304 * this value is non-null; this is the XML "public ID".
aoqi@0 305 * @param uri Used when no alternate copy of the entity is found;
aoqi@0 306 * this is the XML "system ID", normally a URI.
aoqi@0 307 */
aoqi@0 308 public InputSource resolveEntity(String name, String uri)
aoqi@0 309 throws IOException {
aoqi@0 310 InputSource retval;
aoqi@0 311 String mappedURI = name2uri(name);
aoqi@0 312 InputStream stream;
aoqi@0 313
aoqi@0 314 // prefer explicit URI mappings, then bundled resources...
aoqi@0 315 if (mappedURI == null && (stream = mapResource(name)) != null) {
aoqi@0 316 uri = "java:resource:" + (String) id2resource.get(name);
aoqi@0 317 retval = new InputSource(XmlReader.createReader(stream));
aoqi@0 318
aoqi@0 319 // ...and treat all URIs the same (as URLs for now).
aoqi@0 320 } else {
aoqi@0 321 URL url;
aoqi@0 322 URLConnection conn;
aoqi@0 323
aoqi@0 324 if (mappedURI != null)
aoqi@0 325 uri = mappedURI;
aoqi@0 326 else if (uri == null)
aoqi@0 327 return null;
aoqi@0 328
aoqi@0 329 url = new URL(uri);
aoqi@0 330 conn = url.openConnection();
aoqi@0 331 uri = conn.getURL().toString();
aoqi@0 332 // System.out.println ("++ URI: " + url);
aoqi@0 333 if (ignoringMIME)
aoqi@0 334 retval = new InputSource(XmlReader.createReader(conn.getInputStream()));
aoqi@0 335 else {
aoqi@0 336 String contentType = conn.getContentType();
aoqi@0 337 retval = createInputSource(contentType,
aoqi@0 338 conn.getInputStream(),
aoqi@0 339 false, url.getProtocol());
aoqi@0 340 }
aoqi@0 341 }
aoqi@0 342 retval.setSystemId(uri);
aoqi@0 343 retval.setPublicId(name);
aoqi@0 344 return retval;
aoqi@0 345 }
aoqi@0 346
aoqi@0 347
aoqi@0 348 /**
aoqi@0 349 * Returns true if this resolver is ignoring MIME types in the documents
aoqi@0 350 * it returns, to work around bugs in how servers have reported the
aoqi@0 351 * documents' MIME types.
aoqi@0 352 */
aoqi@0 353 public boolean isIgnoringMIME() {
aoqi@0 354 return ignoringMIME;
aoqi@0 355 }
aoqi@0 356
aoqi@0 357 /**
aoqi@0 358 * Tells the resolver whether to ignore MIME types in the documents it
aoqi@0 359 * retrieves. Many web servers incorrectly assign text documents a
aoqi@0 360 * default character encoding, even when that is incorrect. For example,
aoqi@0 361 * all HTTP text documents default to use ISO-8859-1 (used for Western
aoqi@0 362 * European languages), and other MIME sources default text documents
aoqi@0 363 * to use US-ASCII (a seven bit encoding). For XML documents which
aoqi@0 364 * include text encoding declarations (as most should do), these server
aoqi@0 365 * bugs can be worked around by ignoring the MIME type entirely.
aoqi@0 366 */
aoqi@0 367 public void setIgnoringMIME(boolean value) {
aoqi@0 368 ignoringMIME = value;
aoqi@0 369 }
aoqi@0 370
aoqi@0 371
aoqi@0 372 // maps the public ID to an alternate URI, if one is registered
aoqi@0 373 private String name2uri(String publicId) {
aoqi@0 374 if (publicId == null || id2uri == null)
aoqi@0 375 return null;
aoqi@0 376 return (String) id2uri.get(publicId);
aoqi@0 377 }
aoqi@0 378
aoqi@0 379
aoqi@0 380 /**
aoqi@0 381 * Registers the given public ID as corresponding to a particular
aoqi@0 382 * URI, typically a local copy. This URI will be used in preference
aoqi@0 383 * to ones provided as system IDs in XML entity declarations. This
aoqi@0 384 * mechanism would most typically be used for Document Type Definitions
aoqi@0 385 * (DTDs), where the public IDs are formally managed and versioned.
aoqi@0 386 *
aoqi@0 387 * @param publicId The managed public ID being mapped
aoqi@0 388 * @param uri The URI of the preferred copy of that entity
aoqi@0 389 */
aoqi@0 390 public void registerCatalogEntry(String publicId,
aoqi@0 391 String uri) {
aoqi@0 392 if (id2uri == null)
aoqi@0 393 id2uri = new Hashtable(17);
aoqi@0 394 id2uri.put(publicId, uri);
aoqi@0 395 }
aoqi@0 396
aoqi@0 397
aoqi@0 398 // return the resource as a stream
aoqi@0 399 private InputStream mapResource(String publicId) {
aoqi@0 400 // System.out.println ("++ PUBLIC: " + publicId);
aoqi@0 401 if (publicId == null || id2resource == null)
aoqi@0 402 return null;
aoqi@0 403
aoqi@0 404 String resourceName = (String) id2resource.get(publicId);
aoqi@0 405 ClassLoader loader = null;
aoqi@0 406
aoqi@0 407 if (resourceName == null)
aoqi@0 408 return null;
aoqi@0 409 // System.out.println ("++ Resource: " + resourceName);
aoqi@0 410
aoqi@0 411 if (id2loader != null)
aoqi@0 412 loader = (ClassLoader) id2loader.get(publicId);
aoqi@0 413 // System.out.println ("++ Loader: " + loader);
aoqi@0 414 if (loader == null)
aoqi@0 415 return ClassLoader.getSystemResourceAsStream(resourceName);
aoqi@0 416 return loader.getResourceAsStream(resourceName);
aoqi@0 417 }
aoqi@0 418
aoqi@0 419 /**
aoqi@0 420 * Registers a given public ID as corresponding to a particular Java
aoqi@0 421 * resource in a given class loader, typically distributed with a
aoqi@0 422 * software package. This resource will be preferred over system IDs
aoqi@0 423 * included in XML documents. This mechanism should most typically be
aoqi@0 424 * used for Document Type Definitions (DTDs), where the public IDs are
aoqi@0 425 * formally managed and versioned.
aoqi@0 426 * <p/>
aoqi@0 427 * <P> If a mapping to a URI has been provided, that mapping takes
aoqi@0 428 * precedence over this one.
aoqi@0 429 *
aoqi@0 430 * @param publicId The managed public ID being mapped
aoqi@0 431 * @param resourceName The name of the Java resource
aoqi@0 432 * @param loader The class loader holding the resource, or null if
aoqi@0 433 * it is a system resource.
aoqi@0 434 */
aoqi@0 435 public void registerCatalogEntry(String publicId,
aoqi@0 436 String resourceName,
aoqi@0 437 ClassLoader loader) {
aoqi@0 438 if (id2resource == null)
aoqi@0 439 id2resource = new Hashtable(17);
aoqi@0 440 id2resource.put(publicId, resourceName);
aoqi@0 441
aoqi@0 442 if (loader != null) {
aoqi@0 443 if (id2loader == null)
aoqi@0 444 id2loader = new Hashtable(17);
aoqi@0 445 id2loader.put(publicId, loader);
aoqi@0 446 }
aoqi@0 447 }
aoqi@0 448 }

mercurial