001    /*
002     * JBoss DNA (http://www.jboss.org/dna)
003     * See the COPYRIGHT.txt file distributed with this work for information
004     * regarding copyright ownership.  Some portions may be licensed
005     * to Red Hat, Inc. under one or more contributor license agreements.
006     * See the AUTHORS.txt file in the distribution for a full listing of 
007     * individual contributors. 
008     *
009     * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA
010     * is licensed to you under the terms of the GNU Lesser General Public License as
011     * published by the Free Software Foundation; either version 2.1 of
012     * the License, or (at your option) any later version.
013     *
014     * JBoss DNA is distributed in the hope that it will be useful,
015     * but WITHOUT ANY WARRANTY; without even the implied warranty of
016     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
017     * Lesser General Public License for more details.
018     *
019     * You should have received a copy of the GNU Lesser General Public
020     * License along with this software; if not, write to the Free
021     * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
022     * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
023     */
024    package org.jboss.dna.graph.xml;
025    
026    import java.util.ArrayList;
027    import java.util.Arrays;
028    import java.util.Collection;
029    import java.util.Collections;
030    import java.util.HashMap;
031    import java.util.LinkedList;
032    import java.util.List;
033    import java.util.Map;
034    import javax.xml.parsers.SAXParser;
035    import net.jcip.annotations.NotThreadSafe;
036    import org.jboss.dna.common.text.TextDecoder;
037    import org.jboss.dna.common.text.XmlNameEncoder;
038    import org.jboss.dna.common.util.CheckArg;
039    import org.jboss.dna.graph.ExecutionContext;
040    import org.jboss.dna.graph.io.Destination;
041    import org.jboss.dna.graph.property.Name;
042    import org.jboss.dna.graph.property.NameFactory;
043    import org.jboss.dna.graph.property.NamespaceRegistry;
044    import org.jboss.dna.graph.property.Path;
045    import org.jboss.dna.graph.property.PathFactory;
046    import org.jboss.dna.graph.property.Property;
047    import org.jboss.dna.graph.property.PropertyFactory;
048    import org.jboss.dna.graph.property.basic.LocalNamespaceRegistry;
049    import org.xml.sax.Attributes;
050    import org.xml.sax.ext.DefaultHandler2;
051    import com.google.common.collect.LinkedHashMultimap;
052    import com.google.common.collect.Multimap;
053    
054    /**
055     * A {@link DefaultHandler2} specialization that responds to XML content events by creating the corresponding content in the
056     * supplied graph. This implementation ignores DTD entities, XML contents, and other XML processing instructions. If other
057     * behavior is required, the appropriate methods can be overridden. (Which is why this class extends <code>DefaultHandler2</code>,
058     * which has support for processing all the different parts of XML.
059     * <p>
060     * This class can be passed to the {@link SAXParser}'s {@link SAXParser#parse(java.io.File, org.xml.sax.helpers.DefaultHandler)
061     * parse(..,DefaultHandler)} methods.
062     * </p>
063     * 
064     * @author Randall Hauch
065     */
066    @NotThreadSafe
067    public class XmlHandler extends DefaultHandler2 {
068    
069        /**
070         * The choices for how attributes that have no namespace prefix should be assigned a namespace.
071         * 
072         * @author Randall Hauch
073         */
074        public enum AttributeScoping {
075            /** The attribute's namespace is the default namespace */
076            USE_DEFAULT_NAMESPACE,
077            /** The attribute's namespace is the same namespace as the containing element */
078            INHERIT_ELEMENT_NAMESPACE;
079        }
080    
081        private final ExecutionContext context;
082    
083        /**
084         * Decoder for XML names, to turn '_xHHHH_' sequences in the XML element and attribute names into the corresponding UTF-16
085         * characters.
086         */
087        public static TextDecoder DEFAULT_DECODER = new XmlNameEncoder();
088    
089        /**
090         * The default {@link AttributeScoping}.
091         */
092        public static AttributeScoping DEFAULT_ATTRIBUTE_SCOPING = AttributeScoping.USE_DEFAULT_NAMESPACE;
093    
094        /**
095         * The destination where the content should be sent.
096         */
097        protected final Destination destination;
098    
099        /**
100         * The name of the XML attribute whose value should be used for the name of the node. For example, "jcr:name".
101         */
102        protected final Name nameAttribute;
103    
104        /**
105         * The name of the property that is to be set with the type of the XML element. For example, "jcr:name".
106         */
107        protected final Name typeAttribute;
108    
109        /**
110         * The value of the node type property, if the node's name is set with the {@link #nameAttribute}.
111         */
112        protected final Name typeAttributeValue;
113    
114        /**
115         * The cached reference to the graph's path factory.
116         */
117        protected final PathFactory pathFactory;
118    
119        /**
120         * The cached reference to the graph's name factory.
121         */
122        protected final NameFactory nameFactory;
123    
124        /**
125         * The cached reference to the graph's property factory.
126         */
127        protected final PropertyFactory propertyFactory;
128    
129        /**
130         * The cached reference to the graph's namespace registry.
131         */
132        protected final NamespaceRegistry namespaceRegistry;
133    
134        /**
135         * The TextDecoder that is used to decode the names.
136         */
137        protected final TextDecoder decoder;
138    
139        /**
140         * The stack of prefixes for each namespace, which is used to keep the {@link #namespaceRegistry local namespace registry} in
141         * sync with the namespaces in the XML document.
142         */
143        private final Map<String, LinkedList<String>> prefixStackByUri = new HashMap<String, LinkedList<String>>();
144    
145        private final AttributeScoping attributeScoping;
146    
147        /**
148         * The path for the node representing the current element. This starts out as the path supplied by the constructor, and never
149         * is shorter than that initial path.
150         */
151        protected Path currentPath;
152    
153        /**
154         * Flag the records whether the first element should be skipped.
155         */
156        protected boolean skipFirstElement;
157    
158        /**
159         * A temporary list used to store the properties for a single node. This is cleared, populated, then used to create the node.
160         */
161        protected final List<Property> properties = new ArrayList<Property>();
162    
163        /**
164         * A working array that contains a single value object that is used to create Property objects (without having to create an
165         * array of values for each property).
166         */
167        protected final Object[] propertyValues = new Object[1];
168    
169        /**
170         * Character buffer to aggregate nested character data
171         * 
172         * @see ElementEntry
173         */
174        private StringBuilder characterDataBuffer = new StringBuilder();
175    
176        /**
177         * Stack of pending {@link ElementEntry element entries} from the root of the imported content to the current node.
178         * 
179         * @see ElementEntry
180         */
181        private final LinkedList<ElementEntry> elementStack = new LinkedList<ElementEntry>();
182    
183        /**
184         * Create a handler that creates content in the supplied graph
185         * 
186         * @param destination the destination where the content should be sent.graph in which the content should be placed
187         * @param skipRootElement true if the root element of the document should be skipped, or false if the root element should be
188         *        converted to the top-level node of the content
189         * @param parent the path to the node in the graph under which the content should be placed; if null, the root node is assumed
190         * @param textDecoder the text decoder that should be used to decode the XML element names and XML attribute names, prior to
191         *        using those values to create names; or null if the default encoder should be used
192         * @param nameAttribute the name of the property whose value should be used for the names of the nodes (typically, this is
193         *        "jcr:name" or something equivalent); or null if the XML element name should always be used as the node name
194         * @param typeAttribute the name of the property that should be set with the type of the XML element, or null if there is no
195         *        such property
196         * @param typeAttributeValue the value of the type property that should be used if the node has no <code>nameAttribute</code>,
197         *        or null if the value should be set to the type of the XML element
198         * @param scoping defines how to choose the namespace of attributes that do not have a namespace prefix; if null, the
199         *        {@link #DEFAULT_ATTRIBUTE_SCOPING} value is used
200         * @throws IllegalArgumentException if the destination reference is null
201         */
202        public XmlHandler( Destination destination,
203                           boolean skipRootElement,
204                           Path parent,
205                           TextDecoder textDecoder,
206                           Name nameAttribute,
207                           Name typeAttribute,
208                           Name typeAttributeValue,
209                           AttributeScoping scoping ) {
210            CheckArg.isNotNull(destination, "destination");
211            assert destination != null;
212            this.destination = destination;
213            this.nameAttribute = nameAttribute;
214            this.typeAttribute = typeAttribute;
215            this.typeAttributeValue = typeAttributeValue;
216            this.decoder = textDecoder != null ? textDecoder : DEFAULT_DECODER;
217            this.skipFirstElement = skipRootElement;
218            this.attributeScoping = scoping != null ? scoping : DEFAULT_ATTRIBUTE_SCOPING;
219    
220            // Use the execution context ...
221            this.context = destination.getExecutionContext();
222            assert this.context != null;
223    
224            // Set up a local namespace registry that is kept in sync with the namespaces found in this XML document ...
225            NamespaceRegistry namespaceRegistry = new LocalNamespaceRegistry(this.context.getNamespaceRegistry());
226            final ExecutionContext localContext = this.context.with(namespaceRegistry);
227    
228            // Set up references to frequently-used objects in the context ...
229            this.nameFactory = localContext.getValueFactories().getNameFactory();
230            this.pathFactory = localContext.getValueFactories().getPathFactory();
231            this.propertyFactory = localContext.getPropertyFactory();
232            this.namespaceRegistry = localContext.getNamespaceRegistry();
233            assert this.nameFactory != null;
234            assert this.pathFactory != null;
235            assert this.propertyFactory != null;
236            assert this.namespaceRegistry != null;
237    
238            // Set up the initial path ...
239            this.currentPath = parent != null ? parent : this.pathFactory.createRootPath();
240            assert this.currentPath != null;
241        }
242    
243        /**
244         * {@inheritDoc}
245         * <p>
246         * This method ensures that the namespace is registered with the {@link NamespaceRegistry registry}, using the supplied prefix
247         * to register the namespace if required. Note that because this class does not really use the namespace prefixes to create
248         * {@link Name} objects, no attempt is made to match the XML namespace prefixes.
249         * </p>
250         * 
251         * @see org.xml.sax.helpers.DefaultHandler#startPrefixMapping(java.lang.String, java.lang.String)
252         */
253        @Override
254        public void startPrefixMapping( String prefix,
255                                        String uri ) {
256            assert uri != null;
257            // Add the prefix to the stack ...
258            LinkedList<String> prefixStack = this.prefixStackByUri.get(uri);
259            if (prefixStack == null) {
260                prefixStack = new LinkedList<String>();
261                this.prefixStackByUri.put(uri, prefixStack);
262            }
263            prefixStack.addFirst(prefix);
264    
265            // If the namespace is already registered, then we'll have to register it in the context's registry, too.
266            if (!namespaceRegistry.isRegisteredNamespaceUri(uri)) {
267                // The namespace is not already registered (locally or in the context's registry), so we have to
268                // register it with the context's registry (which the local register then inherits).
269                NamespaceRegistry contextRegistry = context.getNamespaceRegistry();
270                if (contextRegistry.getNamespaceForPrefix(prefix) != null) {
271                    // The prefix is already bound, so register and generate a unique prefix
272                    context.getNamespaceRegistry().getPrefixForNamespaceUri(uri, true);
273                    // Now register locally with the supplied prefix ...
274                    namespaceRegistry.register(prefix, uri);
275                } else {
276                    context.getNamespaceRegistry().register(prefix, uri);
277                }
278            } else {
279                // It is already registered, but re-register it locally using the supplied prefix ...
280                namespaceRegistry.register(prefix, uri);
281            }
282        }
283    
284        /**
285         * {@inheritDoc}
286         * 
287         * @see org.xml.sax.helpers.DefaultHandler#endPrefixMapping(java.lang.String)
288         */
289        @Override
290        public void endPrefixMapping( String prefix ) {
291            assert prefix != null;
292            // Get the current URI for this prefix ...
293            String uri = namespaceRegistry.getNamespaceForPrefix(prefix);
294            assert uri != null;
295    
296            // Get the previous prefix from the stack ...
297            LinkedList<String> prefixStack = this.prefixStackByUri.get(uri);
298            assert prefixStack != null;
299            assert !prefixStack.isEmpty();
300            String existingPrefix = prefixStack.removeFirst();
301            assert prefix.equals(existingPrefix);
302    
303            // If there are no previous prefixes, then remove the mapping ...
304            if (prefixStack.isEmpty()) {
305                namespaceRegistry.unregister(uri);
306                prefixStackByUri.remove(uri);
307            } else {
308                String previous = prefixStack.getFirst();
309                namespaceRegistry.register(previous, uri);
310            }
311        }
312    
313        /**
314         * {@inheritDoc}
315         * 
316         * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String,
317         *      org.xml.sax.Attributes)
318         */
319        @Override
320        public void startElement( String uri,
321                                  String localName,
322                                  String name,
323                                  Attributes attributes ) {
324            // Should this (root) element be skipped?
325            if (skipFirstElement) {
326                skipFirstElement = false;
327                return;
328            }
329            assert localName != null;
330            Name nodeName = null;
331    
332            ElementEntry element;
333            if (!elementStack.isEmpty()) {
334                // Add the parent
335                elementStack.peek().addAsNode();
336                element = new ElementEntry(elementStack.peek(), currentPath, null);
337            } else {
338                element = new ElementEntry(null, currentPath, null);
339            }
340            elementStack.addFirst(element);
341    
342            properties.clear();
343            Object typePropertyValue = null;
344            // Convert each of the attributes to a property ...
345            for (int i = 0, len = attributes.getLength(); i != len; ++i) {
346                String attributeLocalName = attributes.getLocalName(i);
347                String attributeUri = attributes.getURI(i);
348                Name attributeName = null;
349                if ((attributeUri == null || attributeUri.length() == 0) && attributes.getQName(i).indexOf(':') == -1) {
350                    switch (this.attributeScoping) {
351                        case INHERIT_ELEMENT_NAMESPACE:
352                            attributeName = nameFactory.create(uri, attributeLocalName, decoder);
353                            break;
354                        case USE_DEFAULT_NAMESPACE:
355                            attributeName = nameFactory.create(attributeLocalName, decoder);
356                            break;
357                    }
358                } else {
359                    attributeName = nameFactory.create(attributeUri, attributeLocalName, decoder);
360                }
361                assert attributeName != null;
362                // Check to see if this is an attribute that represents the node name (which may be null) ...
363                if (nodeName == null && attributeName.equals(nameAttribute)) {
364                    nodeName = nameFactory.create(attributes.getValue(i)); // don't use a decoder
365                    element.setName(nodeName);
366                    continue;
367                }
368                if (typePropertyValue == null && attributeName.equals(typeAttribute)) {
369                    typePropertyValue = nameFactory.create(attributes.getValue(i)); // don't use a decoder
370                    continue;
371                }
372                // Create a property for this attribute ...
373                element.addProperty(attributeName, attributes.getValue(i));
374            }
375            // Create the node name if required ...
376            if (nodeName == null) {
377                // No attribute defines the node name ...
378                nodeName = nameFactory.create(uri, localName, decoder);
379                element.setName(nodeName);
380            } else {
381                if (typePropertyValue == null) typePropertyValue = nameFactory.create(uri, localName, decoder);
382            }
383            if (typeAttribute != null) {
384                // A attribute defines the node name. Set the type property, if required
385                if (typePropertyValue == null) typePropertyValue = typeAttributeValue;
386                if (typePropertyValue != null) {
387                    element.addProperty(typeAttribute, typePropertyValue);
388                }
389            }
390    
391            // Update the current path ...
392            currentPath = element.path();
393        }
394    
395        /**
396         * {@inheritDoc}
397         * 
398         * @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
399         */
400        @Override
401        public void endElement( String uri,
402                                String localName,
403                                String name ) {
404    
405            String s = characterDataBuffer.toString().trim();
406            if (s.length() > 0) {
407                elementStack.removeFirst().addAsPropertySetTo(s);
408            } else if (!elementStack.isEmpty()) {
409                elementStack.removeFirst().submit();
410            }
411            characterDataBuffer = new StringBuilder();
412    
413            // Nothing to do but to change the current path to be the parent ...
414            currentPath = currentPath.getParent();
415        }
416    
417        /**
418         * {@inheritDoc}
419         * 
420         * @see org.xml.sax.helpers.DefaultHandler#characters(char[], int, int)
421         */
422        @Override
423        public void characters( char[] ch,
424                                int start,
425                                int length ) {
426            // Have to add this to a buffer as one logical set of character data can cause this method to fire multiple times
427            characterDataBuffer.append(ch, start, length);
428        }
429    
430        /**
431         * {@inheritDoc}
432         * 
433         * @see org.xml.sax.helpers.DefaultHandler#endDocument()
434         */
435        @Override
436        public void endDocument() {
437            // Submit any outstanding requests (if there are any) ...
438            destination.submit();
439        }
440    
441        /**
442         * Create a property with the given name and value, obtained from an attribute name and value in the XML content.
443         * <p>
444         * By default, this method creates a property by directly using the value as the sole value of the property.
445         * </p>
446         * 
447         * @param propertyName the name of the property; never null
448         * @param value the attribute value
449         * @return the property; may not be null
450         */
451        protected Property createProperty( Name propertyName,
452                                           Object value ) {
453            propertyValues[0] = value;
454            return propertyFactory.create(propertyName, propertyValues);
455        }
456    
457        /**
458         * Create a property with the given name and values, obtained from an attribute name and value in the XML content.
459         * <p>
460         * By default, this method creates a property by directly using the values as the values of the property.
461         * </p>
462         * 
463         * @param propertyName the name of the property; never null
464         * @param values the attribute values
465         * @return the property; may not be null
466         */
467        protected Property createProperty( Name propertyName,
468                                           Collection<Object> values ) {
469            return propertyFactory.create(propertyName, values);
470        }
471    
472        /**
473         * Possible states for an {@link ElementEntry} instance. All element entries start in state {@code TBD} and then transition to
474         * one of the terminating states, {@code NODE} or {@code PROPERTY} when {@link ElementEntry#addAsNode()} or
475         * {@link ElementEntry#addAsPropertySetTo(Object)} is invoked.
476         */
477        protected enum ElementEntryState {
478            NODE,
479            PROPERTY,
480            TBD
481        }
482    
483        /**
484         * Element entries hold references to the data of "pending" elements. "Pending" elements are elements which have been
485         * encountered through a {@link XmlHandler#startElement(String, String, String, Attributes)} event but have not yet been fully
486         * committed to the {@link XmlHandler#destination}.
487         * <p>
488         * As the current import semantics allow elements with nested character data to be imported as properties, it is not always
489         * possible to determine whether the element represents a node or a property from within the {@code startElement} method.
490         * Therefore, {@code ElementEntries} are initially created in an {@link ElementEntryState#TBD unknown state} and submitted to
491         * the {@code destination} when it can be positively determined that the entry represents a property (if nested character data
492         * is encountered) or a node (if a child node is detected or the {@link XmlHandler#endElement(String, String, String)} method
493         * is invoked prior to encountering nested character data).
494         * </p>
495         * <p>
496         * As DNA does not currently support a way to add a value to an existing property through the Graph API, {@code
497         * ElementEntries} also contain a {@link Multimap} of property names to values. The node's properties are aggregated and only
498         * submitted to the {@code destination} when the {@link XmlHandler#endElement(String, String, String)} event fires.
499         * </p>
500         */
501        private class ElementEntry {
502    
503            private ElementEntry parent;
504            // Stored separately since the root node has no parent ElementEntry but does have a path
505            private Path pathToParent;
506            private Path pathToThisNode;
507            private Name name;
508            private Multimap<Name, Object> properties;
509            private ElementEntryState state;
510    
511            protected ElementEntry( ElementEntry parent,
512                                    Path pathToParent,
513                                    Name name ) {
514                this.parent = parent;
515                this.pathToParent = pathToParent;
516                this.name = name;
517                this.state = ElementEntryState.TBD;
518                properties = new LinkedHashMultimap<Name, Object>();
519            }
520    
521            protected void setName( Name name ) {
522                this.name = name;
523                pathToThisNode = pathFactory.create(pathToParent, name);
524            }
525    
526            protected void addProperty( Name propertyName,
527                                        Object propertyValue ) {
528                assert state != ElementEntryState.PROPERTY;
529                properties.put(propertyName, propertyValue);
530            }
531    
532            protected void addAsNode() {
533                assert state != ElementEntryState.PROPERTY;
534                if (state == ElementEntryState.NODE) return;
535    
536                state = ElementEntryState.NODE;
537                destination.create(pathFactory.create(pathToParent, name), Collections.<Property>emptyList());
538            }
539    
540            protected void addAsPropertySetTo( Object value ) {
541                assert state != ElementEntryState.NODE;
542                state = ElementEntryState.PROPERTY;
543                parent.addProperty(name, value);
544            }
545    
546            protected final Path path() {
547                return pathToThisNode;
548            }
549    
550            protected void submit() {
551                if (state == ElementEntryState.PROPERTY) return;
552    
553                if (state == ElementEntryState.NODE && properties.size() == 0) return;
554                Property[] propertiesToAdd = new Property[properties.size()];
555                int i = 0;
556                for (Name name : properties.keySet()) {
557                    propertiesToAdd[i++] = createProperty(name, properties.get(name));
558                }
559    
560                if (state == ElementEntryState.TBD) {
561                    // Merge the add and the create
562                    destination.create(pathToThisNode, Arrays.asList(propertiesToAdd));
563                } else {
564                    destination.setProperties(pathToThisNode, propertiesToAdd);
565                }
566            }
567        }
568    }