diff -r 88b85470e72c -r f50545b5e2f1 src/share/jaxws_classes/com/sun/tools/internal/xjc/reader/internalizer/DOMForest.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/jaxws_classes/com/sun/tools/internal/xjc/reader/internalizer/DOMForest.java Tue Mar 06 16:09:35 2012 -0800 @@ -0,0 +1,571 @@ +/* + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package com.sun.tools.internal.xjc.reader.internalizer; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static javax.xml.XMLConstants.W3C_XML_SCHEMA_NS_URI; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParserFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; +import javax.xml.transform.Source; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.sax.SAXResult; +import javax.xml.transform.sax.SAXSource; +import javax.xml.validation.SchemaFactory; + +import com.sun.istack.internal.NotNull; +import com.sun.istack.internal.XMLStreamReaderToContentHandler; +import com.sun.tools.internal.xjc.ErrorReceiver; +import com.sun.tools.internal.xjc.Options; +import com.sun.tools.internal.xjc.reader.Const; +import com.sun.tools.internal.xjc.reader.xmlschema.parser.SchemaConstraintChecker; +import com.sun.tools.internal.xjc.util.ErrorReceiverFilter; +import com.sun.xml.internal.bind.marshaller.DataWriter; +import com.sun.xml.internal.xsom.parser.JAXPParser; +import com.sun.xml.internal.xsom.parser.XMLParser; + +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.xml.sax.ContentHandler; +import org.xml.sax.EntityResolver; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; +import org.xml.sax.XMLReader; +import org.xml.sax.helpers.XMLFilterImpl; + + +/** + * Builds a DOM forest and maintains association from + * system IDs to DOM trees. + * + *

+ * A forest is a transitive reflexive closure of referenced documents. + * IOW, if a document is in a forest, all the documents referenced from + * it is in a forest, too. To support this semantics, {@link DOMForest} + * uses {@link InternalizationLogic} to find referenced documents. + * + *

+ * Some documents are marked as "root"s, meaning those documents were + * put into a forest explicitly, not because it is referenced from another + * document. (However, a root document can be referenced from other + * documents, too.) + * + * @author + * Kohsuke Kawaguchi (kohsuke.kawaguchi@sun.com) + */ +public final class DOMForest { + /** actual data storage map<SystemId,Document>. */ + private final Map core = new HashMap(); + + /** + * To correctly feed documents to a schema parser, we need to remember + * which documents (of the forest) were given as the root + * documents, and which of them are read as included/imported + * documents. + * + *

+ * Set of system ids as strings. + */ + private final Set rootDocuments = new HashSet(); + + /** Stores location information for all the trees in this forest. */ + public final LocatorTable locatorTable = new LocatorTable(); + + /** Stores all the outer-most <jaxb:bindings> customizations. */ + public final Set outerMostBindings = new HashSet(); + + /** Used to resolve references to other schema documents. */ + private EntityResolver entityResolver = null; + + /** Errors encountered during the parsing will be sent to this object. */ + private ErrorReceiver errorReceiver = null; + + /** Schema language dependent part of the processing. */ + protected final InternalizationLogic logic; + + private final SAXParserFactory parserFactory; + private final DocumentBuilder documentBuilder; + + + public DOMForest( + SAXParserFactory parserFactory, DocumentBuilder documentBuilder, + InternalizationLogic logic ) { + + this.parserFactory = parserFactory; + this.documentBuilder = documentBuilder; + this.logic = logic; + } + + public DOMForest( InternalizationLogic logic ) { + try { + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + dbf.setNamespaceAware(true); + this.documentBuilder = dbf.newDocumentBuilder(); + + this.parserFactory = SAXParserFactory.newInstance(); + this.parserFactory.setNamespaceAware(true); + } catch( ParserConfigurationException e ) { + throw new AssertionError(e); + } + + this.logic = logic; + } + + /** + * Gets the DOM tree associated with the specified system ID, + * or null if none is found. + */ + public Document get( String systemId ) { + Document doc = core.get(systemId); + + if( doc==null && systemId.startsWith("file:/") && !systemId.startsWith("file://") ) { + // As of JDK1.4, java.net.URL.toExternal method returns URLs like + // "file:/abc/def/ghi" which is an incorrect file protocol URL according to RFC1738. + // Some other correctly functioning parts return the correct URLs ("file:///abc/def/ghi"), + // and this descripancy breaks DOM look up by system ID. + + // this extra check solves this problem. + doc = core.get( "file://"+systemId.substring(5) ); + } + + if( doc==null && systemId.startsWith("file:") ) { + // on Windows, filenames are case insensitive. + // perform case-insensitive search for improved user experience + String systemPath = getPath(systemId); + for (String key : core.keySet()) { + if(key.startsWith("file:") && getPath(key).equalsIgnoreCase(systemPath)) { + doc = core.get(key); + break; + } + } + } + + return doc; + } + + /** + * Strips off the leading 'file:///' portion from an URL. + */ + private String getPath(String key) { + key = key.substring(5); // skip 'file:' + while(key.length()>0 && key.charAt(0)=='/') + key = key.substring(1); + return key; + } + + /** + * Returns a read-only set of root document system IDs. + */ + public Set getRootDocuments() { + return Collections.unmodifiableSet(rootDocuments); + } + + /** + * Picks one document at random and returns it. + */ + public Document getOneDocument() { + for (Document dom : core.values()) { + if (!dom.getDocumentElement().getNamespaceURI().equals(Const.JAXB_NSURI)) + return dom; + } + // we should have caught this error very early on + throw new AssertionError(); + } + + /** + * Checks the correctness of the XML Schema documents and return true + * if it's OK. + * + *

+ * This method performs a weaker version of the tests where error messages + * are provided without line number information. So whenever possible + * use {@link SchemaConstraintChecker}. + * + * @see SchemaConstraintChecker + */ + public boolean checkSchemaCorrectness(ErrorReceiver errorHandler) { + try { + SchemaFactory sf = SchemaFactory.newInstance(W3C_XML_SCHEMA_NS_URI); + ErrorReceiverFilter filter = new ErrorReceiverFilter(errorHandler); + sf.setErrorHandler(filter); + Set roots = getRootDocuments(); + Source[] sources = new Source[roots.size()]; + int i=0; + for (String root : roots) { + sources[i++] = new DOMSource(get(root),root); + } + sf.newSchema(sources); + return !filter.hadError(); + } catch (SAXException e) { + // the errors should have been reported + return false; + } + } + + /** + * Gets the system ID from which the given DOM is parsed. + *

+ * Poor-man's base URI. + */ + public String getSystemId( Document dom ) { + for (Map.Entry e : core.entrySet()) { + if (e.getValue() == dom) + return e.getKey(); + } + return null; + } + + public Document parse( InputSource source, boolean root ) throws SAXException { + if( source.getSystemId()==null ) + throw new IllegalArgumentException(); + + return parse( source.getSystemId(), source, root ); + } + + /** + * Parses an XML at the given location ( + * and XMLs referenced by it) into DOM trees + * and stores them to this forest. + * + * @return the parsed DOM document object. + */ + public Document parse( String systemId, boolean root ) throws SAXException, IOException { + + systemId = Options.normalizeSystemId(systemId); + + if( core.containsKey(systemId) ) + // this document has already been parsed. Just ignore. + return core.get(systemId); + + InputSource is=null; + + // allow entity resolver to find the actual byte stream. + if( entityResolver!=null ) + is = entityResolver.resolveEntity(null,systemId); + if( is==null ) + is = new InputSource(systemId); + + // but we still use the original system Id as the key. + return parse( systemId, is, root ); + } + + /** + * Returns a {@link ContentHandler} to feed SAX events into. + * + *

+ * The client of this class can feed SAX events into the handler + * to parse a document into this DOM forest. + * + * This version requires that the DOM object to be created and registered + * to the map beforehand. + */ + private ContentHandler getParserHandler( Document dom ) { + ContentHandler handler = new DOMBuilder(dom,locatorTable,outerMostBindings); + handler = new WhitespaceStripper(handler,errorReceiver,entityResolver); + handler = new VersionChecker(handler,errorReceiver,entityResolver); + + // insert the reference finder so that + // included/imported schemas will be also parsed + XMLFilterImpl f = logic.createExternalReferenceFinder(this); + f.setContentHandler(handler); + + if(errorReceiver!=null) + f.setErrorHandler(errorReceiver); + if(entityResolver!=null) + f.setEntityResolver(entityResolver); + + return f; + } + + public interface Handler extends ContentHandler { + /** + * Gets the DOM that was built. + */ + public Document getDocument(); + } + + private static abstract class HandlerImpl extends XMLFilterImpl implements Handler { + } + + /** + * Returns a {@link ContentHandler} to feed SAX events into. + * + *

+ * The client of this class can feed SAX events into the handler + * to parse a document into this DOM forest. + */ + public Handler getParserHandler( String systemId, boolean root ) { + final Document dom = documentBuilder.newDocument(); + core.put( systemId, dom ); + if(root) + rootDocuments.add(systemId); + + ContentHandler handler = getParserHandler(dom); + + // we will register the DOM to the map once the system ID becomes available. + // but the SAX allows the event source to not to provide that information, + // so be prepared for such case. + HandlerImpl x = new HandlerImpl() { + public Document getDocument() { + return dom; + } + }; + x.setContentHandler(handler); + + return x; + } + + /** + * Parses the given document and add it to the DOM forest. + * + * @return + * null if there was a parse error. otherwise non-null. + */ + public Document parse( String systemId, InputSource inputSource, boolean root ) throws SAXException { + Document dom = documentBuilder.newDocument(); + + systemId = Options.normalizeSystemId(systemId); + + // put into the map before growing a tree, to + // prevent recursive reference from causing infinite loop. + core.put( systemId, dom ); + if(root) + rootDocuments.add(systemId); + + try { + XMLReader reader = parserFactory.newSAXParser().getXMLReader(); + reader.setContentHandler(getParserHandler(dom)); + if(errorReceiver!=null) + reader.setErrorHandler(errorReceiver); + if(entityResolver!=null) + reader.setEntityResolver(entityResolver); + reader.parse(inputSource); + } catch( ParserConfigurationException e ) { + // in practice, this exception won't happen. + errorReceiver.error(e.getMessage(),e); + core.remove(systemId); + rootDocuments.remove(systemId); + return null; + } catch( IOException e ) { + errorReceiver.error(Messages.format(Messages.DOMFOREST_INPUTSOURCE_IOEXCEPTION, systemId, e.toString()),e); + core.remove(systemId); + rootDocuments.remove(systemId); + return null; + } + + return dom; + } + + public Document parse( String systemId, XMLStreamReader parser, boolean root ) throws XMLStreamException { + Document dom = documentBuilder.newDocument(); + + systemId = Options.normalizeSystemId(systemId); + + if(root) + rootDocuments.add(systemId); + + if(systemId==null) + throw new IllegalArgumentException("system id cannot be null"); + core.put( systemId, dom ); + + new XMLStreamReaderToContentHandler(parser,getParserHandler(dom),false,false).bridge(); + + return dom; + } + + /** + * Performs internalization. + * + * This method should be called only once, only after all the + * schemas are parsed. + * + * @return + * the returned bindings need to be applied after schema + * components are built. + */ + public SCDBasedBindingSet transform(boolean enableSCD) { + return Internalizer.transform(this,enableSCD); + } + + /** + * Performs the schema correctness check by using JAXP 1.3. + * + *

+ * This is "weak", because {@link SchemaFactory#newSchema(Source[])} + * doesn't handle inclusions very correctly (it ends up parsing it + * from its original source, not in this tree), and because + * it doesn't handle two documents for the same namespace very + * well. + * + *

+ * We should eventually fix JAXP (and Xerces), but meanwhile + * this weaker and potentially wrong correctness check is still + * better than nothing when used inside JAX-WS (JAXB CLI and Ant + * does a better job of checking this.) + * + *

+ * To receive errors, use {@link SchemaFactory#setErrorHandler(ErrorHandler)}. + */ + public void weakSchemaCorrectnessCheck(SchemaFactory sf) { + List sources = new ArrayList(); + for( String systemId : getRootDocuments() ) { + Document dom = get(systemId); + if (dom.getDocumentElement().getNamespaceURI().equals(Const.JAXB_NSURI)) + continue; // this isn't a schema. we have to do a negative check because if we see completely unrelated ns, we want to report that as an error + + SAXSource ss = createSAXSource(systemId); + try { + ss.getXMLReader().setFeature("http://xml.org/sax/features/namespace-prefixes",true); + } catch (SAXException e) { + throw new AssertionError(e); // Xerces wants this. See 6395322. + } + sources.add(ss); + } + + try { + sf.newSchema(sources.toArray(new SAXSource[0])); + } catch (SAXException e) { + // error should have been reported. + } catch (RuntimeException e) { + // JAXP RI isn't very trustworthy when it comes to schema error check, + // and we know some cases where it just dies with NPE. So handle it gracefully. + // this masks a bug in the JAXP RI, but we need a release that we have to make. + try { + sf.getErrorHandler().warning( + new SAXParseException(Messages.format( + Messages.ERR_GENERAL_SCHEMA_CORRECTNESS_ERROR,e.getMessage()), + null,null,-1,-1,e)); + } catch (SAXException _) { + // ignore + } + } + } + + /** + * Creates a {@link SAXSource} that, when parsed, reads from this {@link DOMForest} + * (instead of parsing the original source identified by the system ID.) + */ + public @NotNull SAXSource createSAXSource(String systemId) { + ContentHandlerNamespacePrefixAdapter reader = new ContentHandlerNamespacePrefixAdapter(new XMLFilterImpl() { + // XMLReader that uses XMLParser to parse. We need to use XMLFilter to indrect + // handlers, since SAX allows handlers to be changed while parsing. + public void parse(InputSource input) throws SAXException, IOException { + createParser().parse(input, this, this, this); + } + + public void parse(String systemId) throws SAXException, IOException { + parse(new InputSource(systemId)); + } + }); + + return new SAXSource(reader,new InputSource(systemId)); + } + + /** + * Creates {@link XMLParser} for XSOM which reads documents from + * this DOMForest rather than doing a fresh parse. + * + * The net effect is that XSOM will read transformed XML Schemas + * instead of the original documents. + */ + public XMLParser createParser() { + return new DOMForestParser(this,new JAXPParser()); + } + + + + public EntityResolver getEntityResolver() { + return entityResolver; + } + + public void setEntityResolver(EntityResolver entityResolver) { + this.entityResolver = entityResolver; + } + + public ErrorReceiver getErrorHandler() { + return errorReceiver; + } + + public void setErrorHandler(ErrorReceiver errorHandler) { + this.errorReceiver = errorHandler; + } + + /** + * Gets all the parsed documents. + */ + public Document[] listDocuments() { + return core.values().toArray(new Document[core.size()]); + } + + /** + * Gets all the system IDs of the documents. + */ + public String[] listSystemIDs() { + return core.keySet().toArray(new String[core.keySet().size()]); + } + + /** + * Dumps the contents of the forest to the specified stream. + * + * This is a debug method. As such, error handling is sloppy. + */ + public void dump( OutputStream out ) throws IOException { + try { + // create identity transformer + Transformer it = TransformerFactory.newInstance().newTransformer(); + + for (Map.Entry e : core.entrySet()) { + out.write( ("---<< "+e.getKey()+'\n').getBytes() ); + + DataWriter dw = new DataWriter(new OutputStreamWriter(out),null); + dw.setIndentStep(" "); + it.transform( new DOMSource(e.getValue()), + new SAXResult(dw)); + + out.write( "\n\n\n".getBytes() ); + } + } catch( TransformerException e ) { + e.printStackTrace(); + } + } +}