001    /*
002     * JBoss, Home of Professional Open Source.
003     * Copyright 2008, Red Hat Middleware LLC, and individual contributors
004     * as indicated by the @author tags. See the copyright.txt file in the
005     * distribution for a full listing of individual contributors. 
006     *
007     * This is free software; you can redistribute it and/or modify it
008     * under the terms of the GNU Lesser General Public License as
009     * published by the Free Software Foundation; either version 2.1 of
010     * the License, or (at your option) any later version.
011     *
012     * This software is distributed in the hope that it will be useful,
013     * but WITHOUT ANY WARRANTY; without even the implied warranty of
014     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015     * Lesser General Public License for more details.
016     *
017     * You should have received a copy of the GNU Lesser General Public
018     * License along with this software; if not, write to the Free
019     * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020     * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
021     */
022    package org.jboss.dna.sequencer.xml;
023    
024    import java.io.InputStream;
025    import org.jboss.dna.common.text.TextDecoder;
026    import org.jboss.dna.graph.JcrNtLexicon;
027    import org.jboss.dna.graph.properties.Name;
028    import org.jboss.dna.graph.sequencers.SequencerContext;
029    import org.jboss.dna.graph.sequencers.SequencerOutput;
030    import org.jboss.dna.graph.sequencers.StreamSequencer;
031    import org.xml.sax.InputSource;
032    import org.xml.sax.SAXNotRecognizedException;
033    import org.xml.sax.SAXNotSupportedException;
034    import org.xml.sax.XMLReader;
035    import org.xml.sax.helpers.XMLReaderFactory;
036    
037    /**
038     * A sequencer for XML files, which maintains DTD, entity, comments, and other content. Note that by default the sequencer uses
039     * the {@link XmlSequencer.AttributeScoping#USE_DEFAULT_NAMESPACE default namespace} for unqualified attribute rather than
040     * {@link XmlSequencer.AttributeScoping#INHERIT_ELEMENT_NAMESPACE inheriting the namespace from the element}. (See also
041     * {@link InheritingXmlSequencer}.
042     * 
043     * @author John Verhaeg
044     */
045    public class XmlSequencer implements StreamSequencer {
046    
047        /**
048         * The choices for how attributes that have no namespace prefix should be assigned a namespace.
049         * 
050         * @author Randall Hauch
051         */
052        public enum AttributeScoping {
053            /** The attribute's namespace is the default namespace */
054            USE_DEFAULT_NAMESPACE,
055            /** The attribute's namespace is the same namespace as the containing element */
056            INHERIT_ELEMENT_NAMESPACE;
057        }
058    
059        /*package*/static final String DEFAULT_PRIMARY_TYPE = "nt:unstructured";
060        /*package*/static final String DECL_HANDLER_FEATURE = "http://xml.org/sax/properties/declaration-handler";
061        /*package*/static final String ENTITY_RESOLVER_2_FEATURE = "http://xml.org/sax/features/use-entity-resolver2";
062        /*package*/static final String LEXICAL_HANDLER_FEATURE = "http://xml.org/sax/properties/lexical-handler";
063        /*package*/static final String RESOLVE_DTD_URIS_FEATURE = "http://xml.org/sax/features/resolve-dtd-uris";
064        /*package*/static final String LOAD_EXTERNAL_DTDS_FEATURE = "http://apache.org/xml/features/nonvalidating/load-external-dtd";
065    
066        private AttributeScoping scoping = AttributeScoping.USE_DEFAULT_NAMESPACE;
067    
068        /**
069         * @param scoping Sets scoping to the specified value.
070         */
071        public void setAttributeScoping( AttributeScoping scoping ) {
072            this.scoping = scoping;
073        }
074    
075        /**
076         * @return scoping
077         */
078        public AttributeScoping getAttributeScoping() {
079            return scoping;
080        }
081    
082        /**
083         * {@inheritDoc}
084         * 
085         * @see org.jboss.dna.graph.sequencers.StreamSequencer#sequence(InputStream, SequencerOutput, SequencerContext)
086         */
087        public void sequence( InputStream stream,
088                              SequencerOutput output,
089                              SequencerContext context ) {
090            XMLReader reader;
091            try {
092                // Set up the XML handler ...
093                Name primaryType = JcrNtLexicon.UNSTRUCTURED;
094                Name nameAttribute = null;
095                TextDecoder decoder = null;
096                XmlSequencerHandler handler = new XmlSequencerHandler(output, context, nameAttribute, primaryType, decoder, scoping);
097                // Create the reader ...
098                reader = XMLReaderFactory.createXMLReader();
099                reader.setContentHandler(handler);
100                reader.setErrorHandler(handler);
101                // Ensure handler acting as entity resolver 2
102                reader.setProperty(DECL_HANDLER_FEATURE, handler);
103                // Ensure handler acting as lexical handler
104                reader.setProperty(LEXICAL_HANDLER_FEATURE, handler);
105                // Ensure handler acting as entity resolver 2
106                setFeature(reader, ENTITY_RESOLVER_2_FEATURE, true);
107                // Prevent loading of external DTDs
108                setFeature(reader, LOAD_EXTERNAL_DTDS_FEATURE, false);
109                // Prevent the resolving of DTD entities into fully-qualified URIS
110                setFeature(reader, RESOLVE_DTD_URIS_FEATURE, false);
111                // Parse XML document
112                reader.parse(new InputSource(stream));
113            } catch (Exception error) {
114                context.getLogger(getClass()).error(error, XmlSequencerI18n.fatalErrorSequencingXmlDocument, error);
115                context.getProblems().addError(error, XmlSequencerI18n.fatalErrorSequencingXmlDocument, error);
116            }
117        }
118    
119        /**
120         * Sets the reader's named feature to the supplied value, only if the feature is not already set to that value. This method
121         * does nothing if the feature is not known to the reader.
122         * 
123         * @param reader the reader; may not be null
124         * @param featureName the name of the feature; may not be null
125         * @param value the value for the feature
126         */
127        /*package*/static void setFeature( XMLReader reader,
128                                            String featureName,
129                                            boolean value ) {
130            try {
131                if (reader.getFeature(featureName) != value) {
132                    reader.setFeature(featureName, value);
133                }
134            } catch (SAXNotRecognizedException meansFeatureNotRecognized) {
135            } catch (SAXNotSupportedException meansFeatureNotSupported) {
136            }
137        }
138    
139    }