src/share/jaxws_classes/com/sun/xml/internal/dtdparser/Resolver.java

Tue, 06 Mar 2012 16:09:35 -0800

author
ohair
date
Tue, 06 Mar 2012 16:09:35 -0800
changeset 286
f50545b5e2f1
child 397
b99d7e355d4b
permissions
-rw-r--r--

7150322: Stop using drop source bundles in jaxws
Reviewed-by: darcy, ohrstrom

ohair@286 1 /*
ohair@286 2 * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
ohair@286 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
ohair@286 4 *
ohair@286 5 * This code is free software; you can redistribute it and/or modify it
ohair@286 6 * under the terms of the GNU General Public License version 2 only, as
ohair@286 7 * published by the Free Software Foundation. Oracle designates this
ohair@286 8 * particular file as subject to the "Classpath" exception as provided
ohair@286 9 * by Oracle in the LICENSE file that accompanied this code.
ohair@286 10 *
ohair@286 11 * This code is distributed in the hope that it will be useful, but WITHOUT
ohair@286 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
ohair@286 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
ohair@286 14 * version 2 for more details (a copy is included in the LICENSE file that
ohair@286 15 * accompanied this code).
ohair@286 16 *
ohair@286 17 * You should have received a copy of the GNU General Public License version
ohair@286 18 * 2 along with this work; if not, write to the Free Software Foundation,
ohair@286 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
ohair@286 20 *
ohair@286 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
ohair@286 22 * or visit www.oracle.com if you need additional information or have any
ohair@286 23 * questions.
ohair@286 24 */
ohair@286 25
ohair@286 26 package com.sun.xml.internal.dtdparser;
ohair@286 27
ohair@286 28 import org.xml.sax.EntityResolver;
ohair@286 29 import org.xml.sax.InputSource;
ohair@286 30
ohair@286 31 import java.io.File;
ohair@286 32 import java.io.FileInputStream;
ohair@286 33 import java.io.IOException;
ohair@286 34 import java.io.InputStream;
ohair@286 35 import java.net.URL;
ohair@286 36 import java.net.URLConnection;
ohair@286 37 import java.util.Hashtable;
ohair@286 38
ohair@286 39 /**
ohair@286 40 * This entity resolver class provides a number of utilities which can help
ohair@286 41 * managment of external parsed entities in XML. These are commonly used
ohair@286 42 * to hold markup declarations that are to be used as part of a Document
ohair@286 43 * Type Declaration (DTD), or to hold text marked up with XML.
ohair@286 44 * <p/>
ohair@286 45 * <P> Features include: <UL>
ohair@286 46 * <p/>
ohair@286 47 * <LI> Static factory methods are provided for constructing SAX InputSource
ohair@286 48 * objects from Files, URLs, or MIME objects. This eliminates a class of
ohair@286 49 * error-prone coding in applications.
ohair@286 50 * <p/>
ohair@286 51 * <LI> Character encodings for XML documents are correctly supported: <UL>
ohair@286 52 * <p/>
ohair@286 53 * <LI> The encodings defined in the RFCs for MIME content types
ohair@286 54 * (2046 for general MIME, and 2376 for XML in particular), are
ohair@286 55 * supported, handling <em>charset=...</em> attributes and accepting
ohair@286 56 * content types which are known to be safe for use with XML;
ohair@286 57 * <p/>
ohair@286 58 * <LI> The character encoding autodetection algorithm identified
ohair@286 59 * in the XML specification is used, and leverages all of
ohair@286 60 * the JDK 1.1 (and later) character encoding support.
ohair@286 61 * <p/>
ohair@286 62 * <LI> The use of MIME typing may optionally be disabled, forcing the
ohair@286 63 * use of autodetection, to support web servers which don't correctly
ohair@286 64 * report MIME types for XML. For example, they may report text that
ohair@286 65 * is encoded in EUC-JP as being US-ASCII text, leading to fatal
ohair@286 66 * errors during parsing.
ohair@286 67 * <p/>
ohair@286 68 * <LI> The InputSource objects returned by this class always
ohair@286 69 * have a <code>java.io.Reader</code> available as the "character
ohair@286 70 * stream" property.
ohair@286 71 * <p/>
ohair@286 72 * </UL>
ohair@286 73 * <p/>
ohair@286 74 * <LI> Catalog entries can map public identifiers to Java resources or
ohair@286 75 * to local URLs. These are used to reduce network dependencies and loads,
ohair@286 76 * and will often be used for external DTD components. For example, packages
ohair@286 77 * shipping DTD files as resources in JAR files can eliminate network traffic
ohair@286 78 * when accessing them, and sites may provide local caches of common DTDs.
ohair@286 79 * Note that no particular catalog syntax is supported by this class, only
ohair@286 80 * the notion of a set of entries.
ohair@286 81 * <p/>
ohair@286 82 * </UL>
ohair@286 83 * <p/>
ohair@286 84 * <P> Subclasses can perform tasks such as supporting new URI schemes for
ohair@286 85 * URIs which are not URLs, such as URNs (see RFC 2396) or for accessing
ohair@286 86 * MIME entities which are part of a <em>multipart/related</em> group
ohair@286 87 * (see RFC 2387). They may also be used to support particular catalog
ohair@286 88 * syntaxes, such as the <a href="http://www.oasis-open.org/html/a401.htm">
ohair@286 89 * SGML/Open Catalog (SOCAT)</a> which supports the SGML notion of "Formal
ohair@286 90 * Public Identifiers (FPIs).
ohair@286 91 *
ohair@286 92 * @author David Brownell
ohair@286 93 * @author Janet Koenig
ohair@286 94 * @version 1.3 00/02/24
ohair@286 95 */
ohair@286 96 public class Resolver implements EntityResolver {
ohair@286 97 private boolean ignoringMIME;
ohair@286 98
ohair@286 99 // table mapping public IDs to (local) URIs
ohair@286 100 private Hashtable id2uri;
ohair@286 101
ohair@286 102 // tables mapping public IDs to resources and classloaders
ohair@286 103 private Hashtable id2resource;
ohair@286 104 private Hashtable id2loader;
ohair@286 105
ohair@286 106 //
ohair@286 107 // table of MIME content types (less attributes!) known
ohair@286 108 // to be mostly "OK" to use with XML MIME entities. the
ohair@286 109 // idea is to rule out obvious braindamage ("image/jpg")
ohair@286 110 // not the subtle stuff ("text/html") that might actually
ohair@286 111 // be (or become) safe.
ohair@286 112 //
ohair@286 113 private static final String types [] = {
ohair@286 114 "application/xml",
ohair@286 115 "text/xml",
ohair@286 116 "text/plain",
ohair@286 117 "text/html", // commonly mis-inferred
ohair@286 118 "application/x-netcdf", // this is often illegal XML
ohair@286 119 "content/unknown"
ohair@286 120 };
ohair@286 121
ohair@286 122 /**
ohair@286 123 * Constructs a resolver.
ohair@286 124 */
ohair@286 125 public Resolver() {
ohair@286 126 }
ohair@286 127
ohair@286 128 /**
ohair@286 129 * Returns an input source, using the MIME type information and URL
ohair@286 130 * scheme to statically determine the correct character encoding if
ohair@286 131 * possible and otherwise autodetecting it. MIME carefully specifies
ohair@286 132 * the character encoding defaults, and how attributes of the content
ohair@286 133 * type can change it. XML further specifies two mandatory encodings
ohair@286 134 * (UTF-8 and UTF-16), and includes an XML declaration which can be
ohair@286 135 * used to internally label most documents encoded using US-ASCII
ohair@286 136 * supersets (such as Shift_JIS, EUC-JP, ISO-2022-*, ISO-8859-*, and
ohair@286 137 * more).
ohair@286 138 * <p/>
ohair@286 139 * <P> This method can be used to access XML documents which do not
ohair@286 140 * have URIs (such as servlet input streams, or most JavaMail message
ohair@286 141 * entities) and to support access methods such as HTTP POST or PUT.
ohair@286 142 * (URLs normally return content using the GET method.)
ohair@286 143 * <p/>
ohair@286 144 * <P> <em> The caller should set the system ID in order for relative URIs
ohair@286 145 * found in this document to be interpreted correctly.</em> In some cases,
ohair@286 146 * a custom resolver will need to be used; for example, documents
ohair@286 147 * may be grouped in a single MIME "multipart/related" bundle, and
ohair@286 148 * relative URLs would refer to other documents in that bundle.
ohair@286 149 *
ohair@286 150 * @param contentType The MIME content type for the source for which
ohair@286 151 * an InputSource is desired, such as <em>text/xml;charset=utf-8</em>.
ohair@286 152 * @param stream The input byte stream for the input source.
ohair@286 153 * @param checkType If true, this verifies that the content type is known
ohair@286 154 * to support XML documents, such as <em>application/xml</em>.
ohair@286 155 * @param scheme Unless this is "file", unspecified MIME types
ohair@286 156 * default to US-ASCII. Files are always autodetected since most
ohair@286 157 * file systems discard character encoding information.
ohair@286 158 */
ohair@286 159 public static InputSource createInputSource(String contentType,
ohair@286 160 InputStream stream,
ohair@286 161 boolean checkType,
ohair@286 162 String scheme) throws IOException {
ohair@286 163 InputSource retval;
ohair@286 164 String charset = null;
ohair@286 165
ohair@286 166 if (contentType != null) {
ohair@286 167 int index;
ohair@286 168
ohair@286 169 contentType = contentType.toLowerCase();
ohair@286 170 index = contentType.indexOf(';');
ohair@286 171 if (index != -1) {
ohair@286 172 String attributes;
ohair@286 173
ohair@286 174 attributes = contentType.substring(index + 1);
ohair@286 175 contentType = contentType.substring(0, index);
ohair@286 176
ohair@286 177 // use "charset=..." if it's available
ohair@286 178 index = attributes.indexOf("charset");
ohair@286 179 if (index != -1) {
ohair@286 180 attributes = attributes.substring(index + 7);
ohair@286 181 // strip out subsequent attributes
ohair@286 182 if ((index = attributes.indexOf(';')) != -1)
ohair@286 183 attributes = attributes.substring(0, index);
ohair@286 184 // find start of value
ohair@286 185 if ((index = attributes.indexOf('=')) != -1) {
ohair@286 186 attributes = attributes.substring(index + 1);
ohair@286 187 // strip out rfc822 comments
ohair@286 188 if ((index = attributes.indexOf('(')) != -1)
ohair@286 189 attributes = attributes.substring(0, index);
ohair@286 190 // double quotes are optional
ohair@286 191 if ((index = attributes.indexOf('"')) != -1) {
ohair@286 192 attributes = attributes.substring(index + 1);
ohair@286 193 attributes = attributes.substring(0,
ohair@286 194 attributes.indexOf('"'));
ohair@286 195 }
ohair@286 196 charset = attributes.trim();
ohair@286 197 // XXX "\;", "\)" etc were mishandled above
ohair@286 198 }
ohair@286 199 }
ohair@286 200 }
ohair@286 201
ohair@286 202 //
ohair@286 203 // Check MIME type.
ohair@286 204 //
ohair@286 205 if (checkType) {
ohair@286 206 boolean isOK = false;
ohair@286 207 for (int i = 0; i < types.length; i++)
ohair@286 208 if (types[i].equals(contentType)) {
ohair@286 209 isOK = true;
ohair@286 210 break;
ohair@286 211 }
ohair@286 212 if (!isOK)
ohair@286 213 throw new IOException("Not XML: " + contentType);
ohair@286 214 }
ohair@286 215
ohair@286 216 //
ohair@286 217 // "text/*" MIME types have hard-wired character set
ohair@286 218 // defaults, as specified in the RFCs. For XML, we
ohair@286 219 // ignore the system "file.encoding" property since
ohair@286 220 // autodetection is more correct.
ohair@286 221 //
ohair@286 222 if (charset == null) {
ohair@286 223 contentType = contentType.trim();
ohair@286 224 if (contentType.startsWith("text/")) {
ohair@286 225 if (!"file".equalsIgnoreCase(scheme))
ohair@286 226 charset = "US-ASCII";
ohair@286 227 }
ohair@286 228 // "application/*" has no default
ohair@286 229 }
ohair@286 230 }
ohair@286 231
ohair@286 232 retval = new InputSource(XmlReader.createReader(stream, charset));
ohair@286 233 retval.setByteStream(stream);
ohair@286 234 retval.setEncoding(charset);
ohair@286 235 return retval;
ohair@286 236 }
ohair@286 237
ohair@286 238
ohair@286 239 /**
ohair@286 240 * Creates an input source from a given URI.
ohair@286 241 *
ohair@286 242 * @param uri the URI (system ID) for the entity
ohair@286 243 * @param checkType if true, the MIME content type for the entity
ohair@286 244 * is checked for document type and character set encoding.
ohair@286 245 */
ohair@286 246 static public InputSource createInputSource(URL uri, boolean checkType)
ohair@286 247 throws IOException {
ohair@286 248
ohair@286 249 URLConnection conn = uri.openConnection();
ohair@286 250 InputSource retval;
ohair@286 251
ohair@286 252 if (checkType) {
ohair@286 253 String contentType = conn.getContentType();
ohair@286 254 retval = createInputSource(contentType, conn.getInputStream(),
ohair@286 255 false, uri.getProtocol());
ohair@286 256 } else {
ohair@286 257 retval = new InputSource(XmlReader.createReader(conn.getInputStream()));
ohair@286 258 }
ohair@286 259 retval.setSystemId(conn.getURL().toString());
ohair@286 260 return retval;
ohair@286 261 }
ohair@286 262
ohair@286 263
ohair@286 264 /**
ohair@286 265 * Creates an input source from a given file, autodetecting
ohair@286 266 * the character encoding.
ohair@286 267 */
ohair@286 268 static public InputSource createInputSource(File file)
ohair@286 269 throws IOException {
ohair@286 270 InputSource retval;
ohair@286 271 String path;
ohair@286 272
ohair@286 273 retval = new InputSource(XmlReader.createReader(new FileInputStream(file)));
ohair@286 274
ohair@286 275 // On JDK 1.2 and later, simplify this:
ohair@286 276 // "path = file.toURL ().toString ()".
ohair@286 277 path = file.getAbsolutePath();
ohair@286 278 if (File.separatorChar != '/')
ohair@286 279 path = path.replace(File.separatorChar, '/');
ohair@286 280 if (!path.startsWith("/"))
ohair@286 281 path = "/" + path;
ohair@286 282 if (!path.endsWith("/") && file.isDirectory())
ohair@286 283 path = path + "/";
ohair@286 284
ohair@286 285 retval.setSystemId("file:" + path);
ohair@286 286 return retval;
ohair@286 287 }
ohair@286 288
ohair@286 289
ohair@286 290 /**
ohair@286 291 * <b>SAX:</b>
ohair@286 292 * Resolve the given entity into an input source. If the name can't
ohair@286 293 * be mapped to a preferred form of the entity, the URI is used. To
ohair@286 294 * resolve the entity, first a local catalog mapping names to URIs is
ohair@286 295 * consulted. If no mapping is found there, a catalog mapping names
ohair@286 296 * to java resources is consulted. Finally, if neither mapping found
ohair@286 297 * a copy of the entity, the specified URI is used.
ohair@286 298 * <p/>
ohair@286 299 * <P> When a URI is used, <a href="#createInputSource">
ohair@286 300 * createInputSource</a> is used to correctly deduce the character
ohair@286 301 * encoding used by this entity. No MIME type checking is done.
ohair@286 302 *
ohair@286 303 * @param name Used to find alternate copies of the entity, when
ohair@286 304 * this value is non-null; this is the XML "public ID".
ohair@286 305 * @param uri Used when no alternate copy of the entity is found;
ohair@286 306 * this is the XML "system ID", normally a URI.
ohair@286 307 */
ohair@286 308 public InputSource resolveEntity(String name, String uri)
ohair@286 309 throws IOException {
ohair@286 310 InputSource retval;
ohair@286 311 String mappedURI = name2uri(name);
ohair@286 312 InputStream stream;
ohair@286 313
ohair@286 314 // prefer explicit URI mappings, then bundled resources...
ohair@286 315 if (mappedURI == null && (stream = mapResource(name)) != null) {
ohair@286 316 uri = "java:resource:" + (String) id2resource.get(name);
ohair@286 317 retval = new InputSource(XmlReader.createReader(stream));
ohair@286 318
ohair@286 319 // ...and treat all URIs the same (as URLs for now).
ohair@286 320 } else {
ohair@286 321 URL url;
ohair@286 322 URLConnection conn;
ohair@286 323
ohair@286 324 if (mappedURI != null)
ohair@286 325 uri = mappedURI;
ohair@286 326 else if (uri == null)
ohair@286 327 return null;
ohair@286 328
ohair@286 329 url = new URL(uri);
ohair@286 330 conn = url.openConnection();
ohair@286 331 uri = conn.getURL().toString();
ohair@286 332 // System.out.println ("++ URI: " + url);
ohair@286 333 if (ignoringMIME)
ohair@286 334 retval = new InputSource(XmlReader.createReader(conn.getInputStream()));
ohair@286 335 else {
ohair@286 336 String contentType = conn.getContentType();
ohair@286 337 retval = createInputSource(contentType,
ohair@286 338 conn.getInputStream(),
ohair@286 339 false, url.getProtocol());
ohair@286 340 }
ohair@286 341 }
ohair@286 342 retval.setSystemId(uri);
ohair@286 343 retval.setPublicId(name);
ohair@286 344 return retval;
ohair@286 345 }
ohair@286 346
ohair@286 347
ohair@286 348 /**
ohair@286 349 * Returns true if this resolver is ignoring MIME types in the documents
ohair@286 350 * it returns, to work around bugs in how servers have reported the
ohair@286 351 * documents' MIME types.
ohair@286 352 */
ohair@286 353 public boolean isIgnoringMIME() {
ohair@286 354 return ignoringMIME;
ohair@286 355 }
ohair@286 356
ohair@286 357 /**
ohair@286 358 * Tells the resolver whether to ignore MIME types in the documents it
ohair@286 359 * retrieves. Many web servers incorrectly assign text documents a
ohair@286 360 * default character encoding, even when that is incorrect. For example,
ohair@286 361 * all HTTP text documents default to use ISO-8859-1 (used for Western
ohair@286 362 * European languages), and other MIME sources default text documents
ohair@286 363 * to use US-ASCII (a seven bit encoding). For XML documents which
ohair@286 364 * include text encoding declarations (as most should do), these server
ohair@286 365 * bugs can be worked around by ignoring the MIME type entirely.
ohair@286 366 */
ohair@286 367 public void setIgnoringMIME(boolean value) {
ohair@286 368 ignoringMIME = value;
ohair@286 369 }
ohair@286 370
ohair@286 371
ohair@286 372 // maps the public ID to an alternate URI, if one is registered
ohair@286 373 private String name2uri(String publicId) {
ohair@286 374 if (publicId == null || id2uri == null)
ohair@286 375 return null;
ohair@286 376 return (String) id2uri.get(publicId);
ohair@286 377 }
ohair@286 378
ohair@286 379
ohair@286 380 /**
ohair@286 381 * Registers the given public ID as corresponding to a particular
ohair@286 382 * URI, typically a local copy. This URI will be used in preference
ohair@286 383 * to ones provided as system IDs in XML entity declarations. This
ohair@286 384 * mechanism would most typically be used for Document Type Definitions
ohair@286 385 * (DTDs), where the public IDs are formally managed and versioned.
ohair@286 386 *
ohair@286 387 * @param publicId The managed public ID being mapped
ohair@286 388 * @param uri The URI of the preferred copy of that entity
ohair@286 389 */
ohair@286 390 public void registerCatalogEntry(String publicId,
ohair@286 391 String uri) {
ohair@286 392 if (id2uri == null)
ohair@286 393 id2uri = new Hashtable(17);
ohair@286 394 id2uri.put(publicId, uri);
ohair@286 395 }
ohair@286 396
ohair@286 397
ohair@286 398 // return the resource as a stream
ohair@286 399 private InputStream mapResource(String publicId) {
ohair@286 400 // System.out.println ("++ PUBLIC: " + publicId);
ohair@286 401 if (publicId == null || id2resource == null)
ohair@286 402 return null;
ohair@286 403
ohair@286 404 String resourceName = (String) id2resource.get(publicId);
ohair@286 405 ClassLoader loader = null;
ohair@286 406
ohair@286 407 if (resourceName == null)
ohair@286 408 return null;
ohair@286 409 // System.out.println ("++ Resource: " + resourceName);
ohair@286 410
ohair@286 411 if (id2loader != null)
ohair@286 412 loader = (ClassLoader) id2loader.get(publicId);
ohair@286 413 // System.out.println ("++ Loader: " + loader);
ohair@286 414 if (loader == null)
ohair@286 415 return ClassLoader.getSystemResourceAsStream(resourceName);
ohair@286 416 return loader.getResourceAsStream(resourceName);
ohair@286 417 }
ohair@286 418
ohair@286 419 /**
ohair@286 420 * Registers a given public ID as corresponding to a particular Java
ohair@286 421 * resource in a given class loader, typically distributed with a
ohair@286 422 * software package. This resource will be preferred over system IDs
ohair@286 423 * included in XML documents. This mechanism should most typically be
ohair@286 424 * used for Document Type Definitions (DTDs), where the public IDs are
ohair@286 425 * formally managed and versioned.
ohair@286 426 * <p/>
ohair@286 427 * <P> If a mapping to a URI has been provided, that mapping takes
ohair@286 428 * precedence over this one.
ohair@286 429 *
ohair@286 430 * @param publicId The managed public ID being mapped
ohair@286 431 * @param resourceName The name of the Java resource
ohair@286 432 * @param loader The class loader holding the resource, or null if
ohair@286 433 * it is a system resource.
ohair@286 434 */
ohair@286 435 public void registerCatalogEntry(String publicId,
ohair@286 436 String resourceName,
ohair@286 437 ClassLoader loader) {
ohair@286 438 if (id2resource == null)
ohair@286 439 id2resource = new Hashtable(17);
ohair@286 440 id2resource.put(publicId, resourceName);
ohair@286 441
ohair@286 442 if (loader != null) {
ohair@286 443 if (id2loader == null)
ohair@286 444 id2loader = new Hashtable(17);
ohair@286 445 id2loader.put(publicId, loader);
ohair@286 446 }
ohair@286 447 }
ohair@286 448 }

mercurial