001    /*
002     * JBoss DNA (http://www.jboss.org/dna)
003     * See the COPYRIGHT.txt file distributed with this work for information
004     * regarding copyright ownership.  Some portions may be licensed
005     * to Red Hat, Inc. under one or more contributor license agreements.
006     * See the AUTHORS.txt file in the distribution for a full listing of 
007     * individual contributors. 
008     *
009     * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA
010     * is licensed to you under the terms of the GNU Lesser General Public License as
011     * published by the Free Software Foundation; either version 2.1 of
012     * the License, or (at your option) any later version.
013     *
014     * JBoss DNA is distributed in the hope that it will be useful,
015     * but WITHOUT ANY WARRANTY; without even the implied warranty of
016     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
017     * Lesser General Public License for more details.
018     *
019     * You should have received a copy of the GNU Lesser General Public
020     * License along with this software; if not, write to the Free
021     * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
022     * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
023     */
024    package org.jboss.dna.sequencer.xml;
025    
026    import java.io.InputStream;
027    import org.jboss.dna.common.text.TextDecoder;
028    import org.jboss.dna.graph.JcrNtLexicon;
029    import org.jboss.dna.graph.property.Name;
030    import org.jboss.dna.graph.sequencer.SequencerContext;
031    import org.jboss.dna.graph.sequencer.SequencerOutput;
032    import org.jboss.dna.graph.sequencer.StreamSequencer;
033    import org.xml.sax.InputSource;
034    import org.xml.sax.SAXNotRecognizedException;
035    import org.xml.sax.SAXNotSupportedException;
036    import org.xml.sax.XMLReader;
037    import org.xml.sax.helpers.XMLReaderFactory;
038    
039    /**
040     * A sequencer for XML files, which maintains DTD, entity, comments, and other content. Note that by default the sequencer uses
041     * the {@link XmlSequencer.AttributeScoping#USE_DEFAULT_NAMESPACE default namespace} for unqualified attribute rather than
042     * {@link XmlSequencer.AttributeScoping#INHERIT_ELEMENT_NAMESPACE inheriting the namespace from the element}. (See also
043     * {@link InheritingXmlSequencer}.
044     * 
045     * @author John Verhaeg
046     */
047    public class XmlSequencer implements StreamSequencer {
048    
049        /**
050         * The choices for how attributes that have no namespace prefix should be assigned a namespace.
051         * 
052         * @author Randall Hauch
053         */
054        public enum AttributeScoping {
055            /** The attribute's namespace is the default namespace */
056            USE_DEFAULT_NAMESPACE,
057            /** The attribute's namespace is the same namespace as the containing element */
058            INHERIT_ELEMENT_NAMESPACE;
059        }
060    
061        /*package*/static final String DEFAULT_PRIMARY_TYPE = "nt:unstructured";
062        /*package*/static final String DECL_HANDLER_FEATURE = "http://xml.org/sax/properties/declaration-handler";
063        /*package*/static final String ENTITY_RESOLVER_2_FEATURE = "http://xml.org/sax/features/use-entity-resolver2";
064        /*package*/static final String LEXICAL_HANDLER_FEATURE = "http://xml.org/sax/properties/lexical-handler";
065        /*package*/static final String RESOLVE_DTD_URIS_FEATURE = "http://xml.org/sax/features/resolve-dtd-uris";
066        /*package*/static final String LOAD_EXTERNAL_DTDS_FEATURE = "http://apache.org/xml/features/nonvalidating/load-external-dtd";
067    
068        private AttributeScoping scoping = AttributeScoping.USE_DEFAULT_NAMESPACE;
069    
070        /**
071         * @param scoping Sets scoping to the specified value.
072         */
073        public void setAttributeScoping( AttributeScoping scoping ) {
074            this.scoping = scoping;
075        }
076    
077        /**
078         * @return scoping
079         */
080        public AttributeScoping getAttributeScoping() {
081            return scoping;
082        }
083    
084        /**
085         * {@inheritDoc}
086         * 
087         * @see org.jboss.dna.graph.sequencer.StreamSequencer#sequence(InputStream, SequencerOutput, SequencerContext)
088         */
089        public void sequence( InputStream stream,
090                              SequencerOutput output,
091                              SequencerContext context ) {
092            XMLReader reader;
093            try {
094                // Set up the XML handler ...
095                Name primaryType = JcrNtLexicon.UNSTRUCTURED;
096                Name nameAttribute = null;
097                TextDecoder decoder = null;
098                XmlSequencerHandler handler = new XmlSequencerHandler(output, context, nameAttribute, primaryType, decoder, scoping);
099                // Create the reader ...
100                reader = XMLReaderFactory.createXMLReader();
101                reader.setContentHandler(handler);
102                reader.setErrorHandler(handler);
103                // Ensure handler acting as entity resolver 2
104                reader.setProperty(DECL_HANDLER_FEATURE, handler);
105                // Ensure handler acting as lexical handler
106                reader.setProperty(LEXICAL_HANDLER_FEATURE, handler);
107                // Ensure handler acting as entity resolver 2
108                setFeature(reader, ENTITY_RESOLVER_2_FEATURE, true);
109                // Prevent loading of external DTDs
110                setFeature(reader, LOAD_EXTERNAL_DTDS_FEATURE, false);
111                // Prevent the resolving of DTD entities into fully-qualified URIS
112                setFeature(reader, RESOLVE_DTD_URIS_FEATURE, false);
113                // Parse XML document
114                reader.parse(new InputSource(stream));
115            } catch (Exception error) {
116                context.getLogger(getClass()).error(error, XmlSequencerI18n.fatalErrorSequencingXmlDocument, error);
117                context.getProblems().addError(error, XmlSequencerI18n.fatalErrorSequencingXmlDocument, error);
118            }
119        }
120    
121        /**
122         * Sets the reader's named feature to the supplied value, only if the feature is not already set to that value. This method
123         * does nothing if the feature is not known to the reader.
124         * 
125         * @param reader the reader; may not be null
126         * @param featureName the name of the feature; may not be null
127         * @param value the value for the feature
128         */
129        /*package*/static void setFeature( XMLReader reader,
130                                            String featureName,
131                                            boolean value ) {
132            try {
133                if (reader.getFeature(featureName) != value) {
134                    reader.setFeature(featureName, value);
135                }
136            } catch (SAXNotRecognizedException meansFeatureNotRecognized) {
137            } catch (SAXNotSupportedException meansFeatureNotSupported) {
138            }
139        }
140    
141    }