ohair@286: /* alanb@368: * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ohair@286: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ohair@286: * ohair@286: * This code is free software; you can redistribute it and/or modify it ohair@286: * under the terms of the GNU General Public License version 2 only, as ohair@286: * published by the Free Software Foundation. Oracle designates this ohair@286: * particular file as subject to the "Classpath" exception as provided ohair@286: * by Oracle in the LICENSE file that accompanied this code. ohair@286: * ohair@286: * This code is distributed in the hope that it will be useful, but WITHOUT ohair@286: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ohair@286: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ohair@286: * version 2 for more details (a copy is included in the LICENSE file that ohair@286: * accompanied this code). ohair@286: * ohair@286: * You should have received a copy of the GNU General Public License version ohair@286: * 2 along with this work; if not, write to the Free Software Foundation, ohair@286: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ohair@286: * ohair@286: * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ohair@286: * or visit www.oracle.com if you need additional information or have any ohair@286: * questions. ohair@286: */ ohair@286: ohair@286: package com.sun.tools.internal.xjc.reader.internalizer; ohair@286: mkos@397: import com.sun.istack.internal.NotNull; mkos@397: import com.sun.istack.internal.XMLStreamReaderToContentHandler; mkos@397: import com.sun.tools.internal.xjc.ErrorReceiver; mkos@397: import com.sun.tools.internal.xjc.Options; mkos@397: import com.sun.tools.internal.xjc.reader.Const; mkos@397: import com.sun.tools.internal.xjc.util.ErrorReceiverFilter; mkos@397: import com.sun.xml.internal.bind.marshaller.DataWriter; mkos@397: import com.sun.xml.internal.bind.v2.util.XmlFactory; mkos@397: import com.sun.xml.internal.xsom.parser.JAXPParser; mkos@397: import com.sun.xml.internal.xsom.parser.XMLParser; mkos@397: import org.w3c.dom.Document; mkos@397: import org.w3c.dom.Element; mkos@397: import org.xml.sax.*; mkos@397: import org.xml.sax.helpers.XMLFilterImpl; ohair@286: ohair@286: import javax.xml.parsers.DocumentBuilder; ohair@286: import javax.xml.parsers.DocumentBuilderFactory; ohair@286: import javax.xml.parsers.ParserConfigurationException; ohair@286: import javax.xml.parsers.SAXParserFactory; ohair@286: import javax.xml.stream.XMLStreamException; ohair@286: import javax.xml.stream.XMLStreamReader; ohair@286: import javax.xml.transform.Source; ohair@286: import javax.xml.transform.Transformer; ohair@286: import javax.xml.transform.TransformerException; ohair@286: import javax.xml.transform.TransformerFactory; ohair@286: import javax.xml.transform.dom.DOMSource; ohair@286: import javax.xml.transform.sax.SAXResult; ohair@286: import javax.xml.transform.sax.SAXSource; ohair@286: import javax.xml.validation.SchemaFactory; mkos@397: import java.io.IOException; mkos@397: import java.io.OutputStream; mkos@397: import java.io.OutputStreamWriter; mkos@397: import java.util.*; ohair@286: mkos@408: import static com.sun.xml.internal.bind.v2.util.XmlFactory.allowExternalAccess; mkos@397: import static javax.xml.XMLConstants.W3C_XML_SCHEMA_NS_URI; ohair@286: ohair@286: ohair@286: /** ohair@286: * Builds a DOM forest and maintains association from ohair@286: * system IDs to DOM trees. ohair@286: * ohair@286: *

ohair@286: * A forest is a transitive reflexive closure of referenced documents. ohair@286: * IOW, if a document is in a forest, all the documents referenced from ohair@286: * it is in a forest, too. To support this semantics, {@link DOMForest} ohair@286: * uses {@link InternalizationLogic} to find referenced documents. ohair@286: * ohair@286: *

ohair@286: * Some documents are marked as "root"s, meaning those documents were ohair@286: * put into a forest explicitly, not because it is referenced from another ohair@286: * document. (However, a root document can be referenced from other ohair@286: * documents, too.) ohair@286: * ohair@286: * @author ohair@286: * Kohsuke Kawaguchi (kohsuke.kawaguchi@sun.com) ohair@286: */ ohair@286: public final class DOMForest { ohair@286: /** actual data storage map<SystemId,Document>. */ ohair@286: private final Map core = new HashMap(); ohair@286: ohair@286: /** ohair@286: * To correctly feed documents to a schema parser, we need to remember ohair@286: * which documents (of the forest) were given as the root ohair@286: * documents, and which of them are read as included/imported ohair@286: * documents. ohair@286: * ohair@286: *

ohair@286: * Set of system ids as strings. ohair@286: */ ohair@286: private final Set rootDocuments = new HashSet(); ohair@286: ohair@286: /** Stores location information for all the trees in this forest. */ ohair@286: public final LocatorTable locatorTable = new LocatorTable(); ohair@286: ohair@286: /** Stores all the outer-most <jaxb:bindings> customizations. */ ohair@286: public final Set outerMostBindings = new HashSet(); ohair@286: ohair@286: /** Used to resolve references to other schema documents. */ ohair@286: private EntityResolver entityResolver = null; ohair@286: ohair@286: /** Errors encountered during the parsing will be sent to this object. */ ohair@286: private ErrorReceiver errorReceiver = null; ohair@286: ohair@286: /** Schema language dependent part of the processing. */ ohair@286: protected final InternalizationLogic logic; ohair@286: ohair@286: private final SAXParserFactory parserFactory; ohair@286: private final DocumentBuilder documentBuilder; ohair@286: alanb@368: private final Options options; ohair@286: ohair@286: public DOMForest( ohair@286: SAXParserFactory parserFactory, DocumentBuilder documentBuilder, ohair@286: InternalizationLogic logic ) { ohair@286: ohair@286: this.parserFactory = parserFactory; ohair@286: this.documentBuilder = documentBuilder; ohair@286: this.logic = logic; alanb@368: this.options = null; ohair@286: } ohair@286: alanb@368: public DOMForest( InternalizationLogic logic, Options opt ) { alanb@368: alanb@368: if (opt == null) throw new AssertionError("Options object null"); alanb@368: this.options = opt; alanb@368: ohair@286: try { alanb@368: DocumentBuilderFactory dbf = XmlFactory.createDocumentBuilderFactory(opt.disableXmlSecurity); ohair@286: this.documentBuilder = dbf.newDocumentBuilder(); alanb@368: this.parserFactory = XmlFactory.createParserFactory(opt.disableXmlSecurity); ohair@286: } catch( ParserConfigurationException e ) { ohair@286: throw new AssertionError(e); ohair@286: } ohair@286: ohair@286: this.logic = logic; ohair@286: } ohair@286: ohair@286: /** ohair@286: * Gets the DOM tree associated with the specified system ID, ohair@286: * or null if none is found. ohair@286: */ ohair@286: public Document get( String systemId ) { ohair@286: Document doc = core.get(systemId); ohair@286: ohair@286: if( doc==null && systemId.startsWith("file:/") && !systemId.startsWith("file://") ) { ohair@286: // As of JDK1.4, java.net.URL.toExternal method returns URLs like ohair@286: // "file:/abc/def/ghi" which is an incorrect file protocol URL according to RFC1738. ohair@286: // Some other correctly functioning parts return the correct URLs ("file:///abc/def/ghi"), ohair@286: // and this descripancy breaks DOM look up by system ID. ohair@286: ohair@286: // this extra check solves this problem. ohair@286: doc = core.get( "file://"+systemId.substring(5) ); ohair@286: } ohair@286: ohair@286: if( doc==null && systemId.startsWith("file:") ) { ohair@286: // on Windows, filenames are case insensitive. ohair@286: // perform case-insensitive search for improved user experience ohair@286: String systemPath = getPath(systemId); ohair@286: for (String key : core.keySet()) { ohair@286: if(key.startsWith("file:") && getPath(key).equalsIgnoreCase(systemPath)) { ohair@286: doc = core.get(key); ohair@286: break; ohair@286: } ohair@286: } ohair@286: } ohair@286: ohair@286: return doc; ohair@286: } ohair@286: ohair@286: /** ohair@286: * Strips off the leading 'file:///' portion from an URL. ohair@286: */ ohair@286: private String getPath(String key) { ohair@286: key = key.substring(5); // skip 'file:' alanb@368: while(key.length()>0 && key.charAt(0)=='/') { ohair@286: key = key.substring(1); alanb@368: } ohair@286: return key; ohair@286: } ohair@286: ohair@286: /** ohair@286: * Returns a read-only set of root document system IDs. ohair@286: */ ohair@286: public Set getRootDocuments() { ohair@286: return Collections.unmodifiableSet(rootDocuments); ohair@286: } ohair@286: ohair@286: /** ohair@286: * Picks one document at random and returns it. ohair@286: */ ohair@286: public Document getOneDocument() { ohair@286: for (Document dom : core.values()) { ohair@286: if (!dom.getDocumentElement().getNamespaceURI().equals(Const.JAXB_NSURI)) ohair@286: return dom; ohair@286: } ohair@286: // we should have caught this error very early on ohair@286: throw new AssertionError(); ohair@286: } ohair@286: ohair@286: /** ohair@286: * Checks the correctness of the XML Schema documents and return true ohair@286: * if it's OK. ohair@286: * ohair@286: *

ohair@286: * This method performs a weaker version of the tests where error messages ohair@286: * are provided without line number information. So whenever possible ohair@286: * use {@link SchemaConstraintChecker}. ohair@286: * ohair@286: * @see SchemaConstraintChecker ohair@286: */ ohair@286: public boolean checkSchemaCorrectness(ErrorReceiver errorHandler) { ohair@286: try { alanb@368: boolean disableXmlSecurity = false; alanb@368: if (options != null) { alanb@368: disableXmlSecurity = options.disableXmlSecurity; alanb@368: } alanb@368: SchemaFactory sf = XmlFactory.createSchemaFactory(W3C_XML_SCHEMA_NS_URI, disableXmlSecurity); ohair@286: ErrorReceiverFilter filter = new ErrorReceiverFilter(errorHandler); ohair@286: sf.setErrorHandler(filter); ohair@286: Set roots = getRootDocuments(); ohair@286: Source[] sources = new Source[roots.size()]; ohair@286: int i=0; ohair@286: for (String root : roots) { ohair@286: sources[i++] = new DOMSource(get(root),root); ohair@286: } ohair@286: sf.newSchema(sources); ohair@286: return !filter.hadError(); ohair@286: } catch (SAXException e) { ohair@286: // the errors should have been reported ohair@286: return false; ohair@286: } ohair@286: } ohair@286: ohair@286: /** ohair@286: * Gets the system ID from which the given DOM is parsed. ohair@286: *

ohair@286: * Poor-man's base URI. ohair@286: */ ohair@286: public String getSystemId( Document dom ) { ohair@286: for (Map.Entry e : core.entrySet()) { ohair@286: if (e.getValue() == dom) ohair@286: return e.getKey(); ohair@286: } ohair@286: return null; ohair@286: } ohair@286: ohair@286: public Document parse( InputSource source, boolean root ) throws SAXException { ohair@286: if( source.getSystemId()==null ) ohair@286: throw new IllegalArgumentException(); ohair@286: ohair@286: return parse( source.getSystemId(), source, root ); ohair@286: } ohair@286: ohair@286: /** ohair@286: * Parses an XML at the given location ( ohair@286: * and XMLs referenced by it) into DOM trees ohair@286: * and stores them to this forest. ohair@286: * ohair@286: * @return the parsed DOM document object. ohair@286: */ ohair@286: public Document parse( String systemId, boolean root ) throws SAXException, IOException { ohair@286: ohair@286: systemId = Options.normalizeSystemId(systemId); ohair@286: ohair@286: if( core.containsKey(systemId) ) ohair@286: // this document has already been parsed. Just ignore. ohair@286: return core.get(systemId); ohair@286: ohair@286: InputSource is=null; ohair@286: ohair@286: // allow entity resolver to find the actual byte stream. ohair@286: if( entityResolver!=null ) ohair@286: is = entityResolver.resolveEntity(null,systemId); ohair@286: if( is==null ) ohair@286: is = new InputSource(systemId); ohair@286: ohair@286: // but we still use the original system Id as the key. ohair@286: return parse( systemId, is, root ); ohair@286: } ohair@286: ohair@286: /** ohair@286: * Returns a {@link ContentHandler} to feed SAX events into. ohair@286: * ohair@286: *

ohair@286: * The client of this class can feed SAX events into the handler ohair@286: * to parse a document into this DOM forest. ohair@286: * ohair@286: * This version requires that the DOM object to be created and registered ohair@286: * to the map beforehand. ohair@286: */ ohair@286: private ContentHandler getParserHandler( Document dom ) { ohair@286: ContentHandler handler = new DOMBuilder(dom,locatorTable,outerMostBindings); ohair@286: handler = new WhitespaceStripper(handler,errorReceiver,entityResolver); ohair@286: handler = new VersionChecker(handler,errorReceiver,entityResolver); ohair@286: ohair@286: // insert the reference finder so that ohair@286: // included/imported schemas will be also parsed ohair@286: XMLFilterImpl f = logic.createExternalReferenceFinder(this); ohair@286: f.setContentHandler(handler); ohair@286: ohair@286: if(errorReceiver!=null) ohair@286: f.setErrorHandler(errorReceiver); ohair@286: if(entityResolver!=null) ohair@286: f.setEntityResolver(entityResolver); ohair@286: ohair@286: return f; ohair@286: } ohair@286: ohair@286: public interface Handler extends ContentHandler { ohair@286: /** ohair@286: * Gets the DOM that was built. ohair@286: */ ohair@286: public Document getDocument(); ohair@286: } ohair@286: ohair@286: private static abstract class HandlerImpl extends XMLFilterImpl implements Handler { ohair@286: } ohair@286: ohair@286: /** ohair@286: * Returns a {@link ContentHandler} to feed SAX events into. ohair@286: * ohair@286: *

ohair@286: * The client of this class can feed SAX events into the handler ohair@286: * to parse a document into this DOM forest. ohair@286: */ ohair@286: public Handler getParserHandler( String systemId, boolean root ) { ohair@286: final Document dom = documentBuilder.newDocument(); ohair@286: core.put( systemId, dom ); ohair@286: if(root) ohair@286: rootDocuments.add(systemId); ohair@286: ohair@286: ContentHandler handler = getParserHandler(dom); ohair@286: ohair@286: // we will register the DOM to the map once the system ID becomes available. ohair@286: // but the SAX allows the event source to not to provide that information, ohair@286: // so be prepared for such case. ohair@286: HandlerImpl x = new HandlerImpl() { ohair@286: public Document getDocument() { ohair@286: return dom; ohair@286: } ohair@286: }; ohair@286: x.setContentHandler(handler); ohair@286: ohair@286: return x; ohair@286: } ohair@286: ohair@286: /** ohair@286: * Parses the given document and add it to the DOM forest. ohair@286: * ohair@286: * @return ohair@286: * null if there was a parse error. otherwise non-null. ohair@286: */ ohair@286: public Document parse( String systemId, InputSource inputSource, boolean root ) throws SAXException { ohair@286: Document dom = documentBuilder.newDocument(); ohair@286: ohair@286: systemId = Options.normalizeSystemId(systemId); ohair@286: ohair@286: // put into the map before growing a tree, to ohair@286: // prevent recursive reference from causing infinite loop. ohair@286: core.put( systemId, dom ); ohair@286: if(root) ohair@286: rootDocuments.add(systemId); ohair@286: ohair@286: try { ohair@286: XMLReader reader = parserFactory.newSAXParser().getXMLReader(); ohair@286: reader.setContentHandler(getParserHandler(dom)); ohair@286: if(errorReceiver!=null) ohair@286: reader.setErrorHandler(errorReceiver); ohair@286: if(entityResolver!=null) ohair@286: reader.setEntityResolver(entityResolver); ohair@286: reader.parse(inputSource); ohair@286: } catch( ParserConfigurationException e ) { ohair@286: // in practice, this exception won't happen. ohair@286: errorReceiver.error(e.getMessage(),e); ohair@286: core.remove(systemId); ohair@286: rootDocuments.remove(systemId); ohair@286: return null; ohair@286: } catch( IOException e ) { ohair@286: errorReceiver.error(Messages.format(Messages.DOMFOREST_INPUTSOURCE_IOEXCEPTION, systemId, e.toString()),e); ohair@286: core.remove(systemId); ohair@286: rootDocuments.remove(systemId); ohair@286: return null; ohair@286: } ohair@286: ohair@286: return dom; ohair@286: } ohair@286: ohair@286: public Document parse( String systemId, XMLStreamReader parser, boolean root ) throws XMLStreamException { ohair@286: Document dom = documentBuilder.newDocument(); ohair@286: ohair@286: systemId = Options.normalizeSystemId(systemId); ohair@286: ohair@286: if(root) ohair@286: rootDocuments.add(systemId); ohair@286: ohair@286: if(systemId==null) ohair@286: throw new IllegalArgumentException("system id cannot be null"); ohair@286: core.put( systemId, dom ); ohair@286: ohair@286: new XMLStreamReaderToContentHandler(parser,getParserHandler(dom),false,false).bridge(); ohair@286: ohair@286: return dom; ohair@286: } ohair@286: ohair@286: /** ohair@286: * Performs internalization. ohair@286: * ohair@286: * This method should be called only once, only after all the ohair@286: * schemas are parsed. ohair@286: * ohair@286: * @return ohair@286: * the returned bindings need to be applied after schema ohair@286: * components are built. ohair@286: */ ohair@286: public SCDBasedBindingSet transform(boolean enableSCD) { alanb@368: return Internalizer.transform(this, enableSCD, options.disableXmlSecurity); ohair@286: } ohair@286: ohair@286: /** ohair@286: * Performs the schema correctness check by using JAXP 1.3. ohair@286: * ohair@286: *

ohair@286: * This is "weak", because {@link SchemaFactory#newSchema(Source[])} ohair@286: * doesn't handle inclusions very correctly (it ends up parsing it ohair@286: * from its original source, not in this tree), and because ohair@286: * it doesn't handle two documents for the same namespace very ohair@286: * well. ohair@286: * ohair@286: *

ohair@286: * We should eventually fix JAXP (and Xerces), but meanwhile ohair@286: * this weaker and potentially wrong correctness check is still ohair@286: * better than nothing when used inside JAX-WS (JAXB CLI and Ant ohair@286: * does a better job of checking this.) ohair@286: * ohair@286: *

ohair@286: * To receive errors, use {@link SchemaFactory#setErrorHandler(ErrorHandler)}. ohair@286: */ ohair@286: public void weakSchemaCorrectnessCheck(SchemaFactory sf) { ohair@286: List sources = new ArrayList(); ohair@286: for( String systemId : getRootDocuments() ) { ohair@286: Document dom = get(systemId); ohair@286: if (dom.getDocumentElement().getNamespaceURI().equals(Const.JAXB_NSURI)) ohair@286: continue; // this isn't a schema. we have to do a negative check because if we see completely unrelated ns, we want to report that as an error ohair@286: ohair@286: SAXSource ss = createSAXSource(systemId); ohair@286: try { ohair@286: ss.getXMLReader().setFeature("http://xml.org/sax/features/namespace-prefixes",true); ohair@286: } catch (SAXException e) { ohair@286: throw new AssertionError(e); // Xerces wants this. See 6395322. ohair@286: } ohair@286: sources.add(ss); ohair@286: } ohair@286: ohair@286: try { mkos@408: allowExternalAccess(sf, "file,http", options.disableXmlSecurity).newSchema(sources.toArray(new SAXSource[0])); ohair@286: } catch (SAXException e) { ohair@286: // error should have been reported. alanb@368: } catch (RuntimeException re) { ohair@286: // JAXP RI isn't very trustworthy when it comes to schema error check, ohair@286: // and we know some cases where it just dies with NPE. So handle it gracefully. ohair@286: // this masks a bug in the JAXP RI, but we need a release that we have to make. ohair@286: try { ohair@286: sf.getErrorHandler().warning( ohair@286: new SAXParseException(Messages.format( alanb@368: Messages.ERR_GENERAL_SCHEMA_CORRECTNESS_ERROR,re.getMessage()), alanb@368: null,null,-1,-1,re)); alanb@368: } catch (SAXException e) { ohair@286: // ignore ohair@286: } ohair@286: } ohair@286: } ohair@286: ohair@286: /** ohair@286: * Creates a {@link SAXSource} that, when parsed, reads from this {@link DOMForest} ohair@286: * (instead of parsing the original source identified by the system ID.) ohair@286: */ ohair@286: public @NotNull SAXSource createSAXSource(String systemId) { ohair@286: ContentHandlerNamespacePrefixAdapter reader = new ContentHandlerNamespacePrefixAdapter(new XMLFilterImpl() { ohair@286: // XMLReader that uses XMLParser to parse. We need to use XMLFilter to indrect ohair@286: // handlers, since SAX allows handlers to be changed while parsing. alanb@368: @Override ohair@286: public void parse(InputSource input) throws SAXException, IOException { ohair@286: createParser().parse(input, this, this, this); ohair@286: } ohair@286: alanb@368: @Override ohair@286: public void parse(String systemId) throws SAXException, IOException { ohair@286: parse(new InputSource(systemId)); ohair@286: } ohair@286: }); ohair@286: ohair@286: return new SAXSource(reader,new InputSource(systemId)); ohair@286: } ohair@286: ohair@286: /** ohair@286: * Creates {@link XMLParser} for XSOM which reads documents from ohair@286: * this DOMForest rather than doing a fresh parse. ohair@286: * ohair@286: * The net effect is that XSOM will read transformed XML Schemas ohair@286: * instead of the original documents. ohair@286: */ ohair@286: public XMLParser createParser() { alanb@368: return new DOMForestParser(this, new JAXPParser(XmlFactory.createParserFactory(options.disableXmlSecurity))); ohair@286: } ohair@286: ohair@286: public EntityResolver getEntityResolver() { ohair@286: return entityResolver; ohair@286: } ohair@286: ohair@286: public void setEntityResolver(EntityResolver entityResolver) { ohair@286: this.entityResolver = entityResolver; ohair@286: } ohair@286: ohair@286: public ErrorReceiver getErrorHandler() { ohair@286: return errorReceiver; ohair@286: } ohair@286: ohair@286: public void setErrorHandler(ErrorReceiver errorHandler) { ohair@286: this.errorReceiver = errorHandler; ohair@286: } ohair@286: ohair@286: /** ohair@286: * Gets all the parsed documents. ohair@286: */ ohair@286: public Document[] listDocuments() { ohair@286: return core.values().toArray(new Document[core.size()]); ohair@286: } ohair@286: ohair@286: /** ohair@286: * Gets all the system IDs of the documents. ohair@286: */ ohair@286: public String[] listSystemIDs() { ohair@286: return core.keySet().toArray(new String[core.keySet().size()]); ohair@286: } ohair@286: ohair@286: /** ohair@286: * Dumps the contents of the forest to the specified stream. ohair@286: * ohair@286: * This is a debug method. As such, error handling is sloppy. ohair@286: */ alanb@368: @SuppressWarnings("CallToThreadDumpStack") ohair@286: public void dump( OutputStream out ) throws IOException { ohair@286: try { ohair@286: // create identity transformer alanb@368: boolean disableXmlSecurity = false; alanb@368: if (options != null) { alanb@368: disableXmlSecurity = options.disableXmlSecurity; alanb@368: } alanb@368: TransformerFactory tf = XmlFactory.createTransformerFactory(disableXmlSecurity); alanb@368: Transformer it = tf.newTransformer(); ohair@286: ohair@286: for (Map.Entry e : core.entrySet()) { ohair@286: out.write( ("---<< "+e.getKey()+'\n').getBytes() ); ohair@286: ohair@286: DataWriter dw = new DataWriter(new OutputStreamWriter(out),null); ohair@286: dw.setIndentStep(" "); ohair@286: it.transform( new DOMSource(e.getValue()), ohair@286: new SAXResult(dw)); ohair@286: ohair@286: out.write( "\n\n\n".getBytes() ); ohair@286: } ohair@286: } catch( TransformerException e ) { ohair@286: e.printStackTrace(); ohair@286: } ohair@286: } ohair@286: }