001    /*
002     * JBoss, Home of Professional Open Source.
003     * Copyright 2008, Red Hat Middleware LLC, and individual contributors
004     * as indicated by the @author tags. See the copyright.txt file in the
005     * distribution for a full listing of individual contributors. 
006     *
007     * This is free software; you can redistribute it and/or modify it
008     * under the terms of the GNU Lesser General Public License as
009     * published by the Free Software Foundation; either version 2.1 of
010     * the License, or (at your option) any later version.
011     *
012     * This software is distributed in the hope that it will be useful,
013     * but WITHOUT ANY WARRANTY; without even the implied warranty of
014     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015     * Lesser General Public License for more details.
016     *
017     * You should have received a copy of the GNU Lesser General Public
018     * License along with this software; if not, write to the Free
019     * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020     * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
021     */
022    package org.jboss.dna.graph.xml;
023    
024    import java.util.ArrayList;
025    import java.util.HashMap;
026    import java.util.LinkedList;
027    import java.util.List;
028    import java.util.Map;
029    import javax.xml.parsers.SAXParser;
030    import net.jcip.annotations.NotThreadSafe;
031    import org.jboss.dna.common.text.TextDecoder;
032    import org.jboss.dna.common.text.XmlNameEncoder;
033    import org.jboss.dna.common.util.CheckArg;
034    import org.jboss.dna.graph.BasicExecutionContext;
035    import org.jboss.dna.graph.ExecutionContext;
036    import org.jboss.dna.graph.properties.Name;
037    import org.jboss.dna.graph.properties.NameFactory;
038    import org.jboss.dna.graph.properties.NamespaceRegistry;
039    import org.jboss.dna.graph.properties.Path;
040    import org.jboss.dna.graph.properties.PathFactory;
041    import org.jboss.dna.graph.properties.Property;
042    import org.jboss.dna.graph.properties.PropertyFactory;
043    import org.jboss.dna.graph.properties.basic.LocalNamespaceRegistry;
044    import org.xml.sax.Attributes;
045    import org.xml.sax.ext.DefaultHandler2;
046    
047    /**
048     * A {@link DefaultHandler2} specialization that responds to XML content events by creating the corresponding content in the
049     * supplied graph. This implementation ignores DTD entities, XML contents, and other XML processing instructions. If other
050     * behavior is required, the appropriate methods can be overridden. (Which is why this class extends <code>DefaultHandler2</code>,
051     * which has support for processing all the different parts of XML.
052     * <p>
053     * This class can be passed to the {@link SAXParser}'s {@link SAXParser#parse(java.io.File, org.xml.sax.helpers.DefaultHandler)
054     * parse(..,DefaultHandler)} methods.
055     * </p>
056     * 
057     * @author Randall Hauch
058     */
059    @NotThreadSafe
060    public class XmlHandler extends DefaultHandler2 {
061    
062        /**
063         * The choices for how attributes that have no namespace prefix should be assigned a namespace.
064         * 
065         * @author Randall Hauch
066         */
067        public enum AttributeScoping {
068            /** The attribute's namespace is the default namespace */
069            USE_DEFAULT_NAMESPACE,
070            /** The attribute's namespace is the same namespace as the containing element */
071            INHERIT_ELEMENT_NAMESPACE;
072        }
073    
074        private final ExecutionContext context;
075    
076        /**
077         * Decoder for XML names, to turn '_xHHHH_' sequences in the XML element and attribute names into the corresponding UTF-16
078         * characters.
079         */
080        public static TextDecoder DEFAULT_DECODER = new XmlNameEncoder();
081    
082        /**
083         * The default {@link AttributeScoping}.
084         */
085        public static AttributeScoping DEFAULT_ATTRIBUTE_SCOPING = AttributeScoping.USE_DEFAULT_NAMESPACE;
086    
087        /**
088         * The destination where the content should be sent.
089         */
090        protected final Destination destination;
091    
092        /**
093         * The name of the XML attribute whose value should be used for the name of the node. For example, "jcr:name".
094         */
095        protected final Name nameAttribute;
096    
097        /**
098         * The name of the property that is to be set with the type of the XML element. For example, "jcr:name".
099         */
100        protected final Name typeAttribute;
101    
102        /**
103         * The value of the node type property, if the node's name is set with the {@link #nameAttribute}.
104         */
105        protected final Name typeAttributeValue;
106    
107        /**
108         * The cached reference to the graph's path factory.
109         */
110        protected final PathFactory pathFactory;
111    
112        /**
113         * The cached reference to the graph's name factory.
114         */
115        protected final NameFactory nameFactory;
116    
117        /**
118         * The cached reference to the graph's property factory.
119         */
120        protected final PropertyFactory propertyFactory;
121    
122        /**
123         * The cached reference to the graph's namespace registry.
124         */
125        protected final NamespaceRegistry namespaceRegistry;
126    
127        /**
128         * The TextDecoder that is used to decode the names.
129         */
130        protected final TextDecoder decoder;
131    
132        /**
133         * The stack of prefixes for each namespace, which is used to keep the {@link #namespaceRegistry local namespace registry} in
134         * sync with the namespaces in the XML document.
135         */
136        private final Map<String, LinkedList<String>> prefixStackByUri = new HashMap<String, LinkedList<String>>();
137    
138        private final AttributeScoping attributeScoping;
139    
140        /**
141         * The path for the node representing the current element. This starts out as the path supplied by the constructor, and never
142         * is shorter than that initial path.
143         */
144        protected Path currentPath;
145    
146        /**
147         * Flag the records whether the first element should be skipped.
148         */
149        protected boolean skipFirstElement;
150    
151        /**
152         * A temporary list used to store the properties for a single node. This is cleared, populated, then used to create the node.
153         */
154        protected final List<Property> properties = new ArrayList<Property>();
155    
156        /**
157         * A working array that contains a single value object that is used to create Property objects (without having to create an
158         * array of values for each property).
159         */
160        protected final Object[] propertyValues = new Object[1];
161    
162        /**
163         * Create a handler that creates content in the supplied graph
164         * 
165         * @param destination the destination where the content should be sent.graph in which the content should be placed
166         * @param skipRootElement true if the root element of the document should be skipped, or false if the root element should be
167         *        converted to the top-level node of the content
168         * @param parent the path to the node in the graph under which the content should be placed; if null, the root node is assumed
169         * @param textDecoder the text decoder that should be used to decode the XML element names and XML attribute names, prior to
170         *        using those values to create names; or null if the default encoder should be used
171         * @param nameAttribute the name of the property whose value should be used for the names of the nodes (typically, this is
172         *        "jcr:name" or something equivalent); or null if the XML element name should always be used as the node name
173         * @param typeAttribute the name of the property that should be set with the type of the XML element, or null if there is no
174         *        such property
175         * @param typeAttributeValue the value of the type property that should be used if the node has no <code>nameAttribute</code>,
176         *        or null if the value should be set to the type of the XML element
177         * @param scoping defines how to choose the namespace of attributes that do not have a namespace prefix; if null, the
178         *        {@link #DEFAULT_ATTRIBUTE_SCOPING} value is used
179         * @throws IllegalArgumentException if the destination reference is null
180         */
181        public XmlHandler( Destination destination,
182                           boolean skipRootElement,
183                           Path parent,
184                           TextDecoder textDecoder,
185                           Name nameAttribute,
186                           Name typeAttribute,
187                           Name typeAttributeValue,
188                           AttributeScoping scoping ) {
189            CheckArg.isNotNull(destination, "destination");
190            assert destination != null;
191            this.destination = destination;
192            this.nameAttribute = nameAttribute;
193            this.typeAttribute = typeAttribute;
194            this.typeAttributeValue = typeAttributeValue;
195            this.decoder = textDecoder != null ? textDecoder : DEFAULT_DECODER;
196            this.skipFirstElement = skipRootElement;
197            this.attributeScoping = scoping != null ? scoping : DEFAULT_ATTRIBUTE_SCOPING;
198    
199            // Use the execution context ...
200            this.context = destination.getExecutionContext();
201            assert this.context != null;
202    
203            // Set up a local namespace registry that is kept in sync with the namespaces found in this XML document ...
204            NamespaceRegistry namespaceRegistry = new LocalNamespaceRegistry(this.context.getNamespaceRegistry());
205            final ExecutionContext localContext = new BasicExecutionContext(this.context, namespaceRegistry);
206    
207            // Set up references to frequently-used objects in the context ...
208            this.nameFactory = localContext.getValueFactories().getNameFactory();
209            this.pathFactory = localContext.getValueFactories().getPathFactory();
210            this.propertyFactory = localContext.getPropertyFactory();
211            this.namespaceRegistry = localContext.getNamespaceRegistry();
212            assert this.nameFactory != null;
213            assert this.pathFactory != null;
214            assert this.propertyFactory != null;
215            assert this.namespaceRegistry != null;
216    
217            // Set up the initial path ...
218            this.currentPath = parent != null ? parent : this.pathFactory.createRootPath();
219            assert this.currentPath != null;
220        }
221    
222        /**
223         * {@inheritDoc}
224         * <p>
225         * This method ensures that the namespace is registered with the {@link NamespaceRegistry registry}, using the supplied prefix
226         * to register the namespace if required. Note that because this class does not really use the namespace prefixes to create
227         * {@link Name} objects, no attempt is made to match the XML namespace prefixes.
228         * </p>
229         * 
230         * @see org.xml.sax.helpers.DefaultHandler#startPrefixMapping(java.lang.String, java.lang.String)
231         */
232        @Override
233        public void startPrefixMapping( String prefix,
234                                        String uri ) {
235            assert uri != null;
236            // Add the prefix to the stack ...
237            LinkedList<String> prefixStack = this.prefixStackByUri.get(uri);
238            if (prefixStack == null) {
239                prefixStack = new LinkedList<String>();
240                this.prefixStackByUri.put(uri, prefixStack);
241            }
242            prefixStack.addFirst(prefix);
243    
244            // If the namespace is already registered, then we'll have to register it in the context's registry, too.
245            if (!namespaceRegistry.isRegisteredNamespaceUri(uri)) {
246                // The namespace is not already registered (locally or in the context's registry), so we have to
247                // register it with the context's registry (which the local register then inherits).
248                NamespaceRegistry contextRegistry = context.getNamespaceRegistry();
249                if (contextRegistry.getNamespaceForPrefix(prefix) != null) {
250                    // The prefix is already bound, so register and generate a unique prefix
251                    context.getNamespaceRegistry().getPrefixForNamespaceUri(uri, true);
252                    // Now register locally with the supplied prefix ...
253                    namespaceRegistry.register(prefix, uri);
254                } else {
255                    context.getNamespaceRegistry().register(prefix, uri);
256                }
257            } else {
258                // It is already registered, but re-register it locally using the supplied prefix ...
259                namespaceRegistry.register(prefix, uri);
260            }
261        }
262    
263        /**
264         * {@inheritDoc}
265         * 
266         * @see org.xml.sax.helpers.DefaultHandler#endPrefixMapping(java.lang.String)
267         */
268        @Override
269        public void endPrefixMapping( String prefix ) {
270            assert prefix != null;
271            // Get the current URI for this prefix ...
272            String uri = namespaceRegistry.getNamespaceForPrefix(prefix);
273            assert uri != null;
274    
275            // Get the previous prefix from the stack ...
276            LinkedList<String> prefixStack = this.prefixStackByUri.get(uri);
277            assert prefixStack != null;
278            assert !prefixStack.isEmpty();
279            String existingPrefix = prefixStack.removeFirst();
280            assert prefix.equals(existingPrefix);
281    
282            // If there are no previous prefixes, then remove the mapping ...
283            if (prefixStack.isEmpty()) {
284                namespaceRegistry.unregister(uri);
285                prefixStackByUri.remove(uri);
286            } else {
287                String previous = prefixStack.getFirst();
288                namespaceRegistry.register(previous, uri);
289            }
290        }
291    
292        /**
293         * {@inheritDoc}
294         * 
295         * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String,
296         *      org.xml.sax.Attributes)
297         */
298        @Override
299        public void startElement( String uri,
300                                  String localName,
301                                  String name,
302                                  Attributes attributes ) {
303            // Should this (root) element be skipped?
304            if (skipFirstElement) {
305                skipFirstElement = false;
306                return;
307            }
308            assert localName != null;
309            Name nodeName = null;
310    
311            properties.clear();
312            Object typePropertyValue = null;
313            // Convert each of the attributes to a property ...
314            for (int i = 0, len = attributes.getLength(); i != len; ++i) {
315                String attributeLocalName = attributes.getLocalName(i);
316                String attributeUri = attributes.getURI(i);
317                Name attributeName = null;
318                if ((attributeUri == null || attributeUri.length() == 0) && attributes.getQName(i).indexOf(':') == -1) {
319                    switch (this.attributeScoping) {
320                        case INHERIT_ELEMENT_NAMESPACE:
321                            attributeName = nameFactory.create(uri, attributeLocalName, decoder);
322                            break;
323                        case USE_DEFAULT_NAMESPACE:
324                            attributeName = nameFactory.create(attributeLocalName, decoder);
325                            break;
326                    }
327                } else {
328                    attributeName = nameFactory.create(attributeUri, attributeLocalName, decoder);
329                }
330                assert attributeName != null;
331                // Check to see if this is an attribute that represents the node name (which may be null) ...
332                if (nodeName == null && attributeName.equals(nameAttribute)) {
333                    nodeName = nameFactory.create(attributes.getValue(i)); // don't use a decoder
334                    continue;
335                }
336                if (typePropertyValue == null && attributeName.equals(typeAttribute)) {
337                    typePropertyValue = nameFactory.create(attributes.getValue(i)); // don't use a decoder
338                    continue;
339                }
340                // Create a property for this attribute ...
341                Property property = createProperty(attributeName, attributes.getValue(i));
342                properties.add(property);
343            }
344            // Create the node name if required ...
345            if (nodeName == null) {
346                // No attribute defines the node name ...
347                nodeName = nameFactory.create(uri, localName, decoder);
348            } else {
349                typePropertyValue = nameFactory.create(uri, localName, decoder);
350            }
351            if (typeAttribute != null) {
352                // A attribute defines the node name. Set the type property, if required
353                if (typePropertyValue == null) typePropertyValue = typeAttributeValue;
354                if (typePropertyValue != null) {
355                    propertyValues[0] = typePropertyValue;
356                    Property property = propertyFactory.create(typeAttribute, propertyValues);
357                    properties.add(property);
358                }
359            }
360            // Update the current path ...
361            currentPath = pathFactory.create(currentPath, nodeName);
362            // Create the node, and note that we don't care about same-name siblings (as the graph will correct them) ...
363            destination.create(currentPath, properties);
364        }
365    
366        /**
367         * {@inheritDoc}
368         * 
369         * @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
370         */
371        @Override
372        public void endElement( String uri,
373                                String localName,
374                                String name ) {
375            // Nothing to do but to change the current path to be the parent ...
376            currentPath = currentPath.getParent();
377        }
378    
379        /**
380         * {@inheritDoc}
381         * 
382         * @see org.xml.sax.helpers.DefaultHandler#endDocument()
383         */
384        @Override
385        public void endDocument() {
386            // Submit any outstanding requests (if there are any) ...
387            destination.submit();
388        }
389    
390        /**
391         * Create a property with the given name and value, obtained from an attribute name and value in the XML content.
392         * <p>
393         * By default, this method creates a property by directly using the value as the sole value of the property.
394         * </p>
395         * 
396         * @param propertyName the name of the property; never null
397         * @param value the attribute value
398         * @return the property; may not be null
399         */
400        protected Property createProperty( Name propertyName,
401                                           Object value ) {
402            propertyValues[0] = value;
403            Property result = propertyFactory.create(propertyName, propertyValues);
404            return result;
405        }
406    
407        /**
408         * Interface used internally as the destination for the requests. This is used to abstract whether the requests should be
409         * submitted immediately or in a single batch.
410         * 
411         * @author Randall Hauch
412         */
413        @NotThreadSafe
414        public static interface Destination {
415    
416            /**
417             * Obtain the execution context of the destination.
418             * 
419             * @return the destination's execution context
420             */
421            public ExecutionContext getExecutionContext();
422    
423            /**
424             * Create a node at the supplied path and with the supplied attributes. The path will be absolute.
425             * 
426             * @param path the absolute path of the node
427             * @param properties the properties for the node; never null, but may be empty if there are no properties
428             */
429            public void create( Path path,
430                                List<Property> properties );
431    
432            /**
433             * Create a node at the supplied path and with the supplied attributes. The path will be absolute.
434             * 
435             * @param path the absolute path of the node
436             * @param firstProperty the first property
437             * @param additionalProperties the remaining properties for the node
438             */
439            public void create( Path path,
440                                Property firstProperty,
441                                Property... additionalProperties );
442    
443            /**
444             * Signal to this destination that any enqueued create requests should be submitted. Usually this happens at the end of
445             * the document parsing, but an implementer must allow for it to be called multiple times and anytime during parsing.
446             */
447            public void submit();
448        }
449    }