001 /* 002 * JBoss DNA (http://www.jboss.org/dna) 003 * See the COPYRIGHT.txt file distributed with this work for information 004 * regarding copyright ownership. Some portions may be licensed 005 * to Red Hat, Inc. under one or more contributor license agreements. 006 * See the AUTHORS.txt file in the distribution for a full listing of 007 * individual contributors. 008 * 009 * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA 010 * is licensed to you under the terms of the GNU Lesser General Public License as 011 * published by the Free Software Foundation; either version 2.1 of 012 * the License, or (at your option) any later version. 013 * 014 * JBoss DNA is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 017 * Lesser General Public License for more details. 018 * 019 * You should have received a copy of the GNU Lesser General Public 020 * License along with this software; if not, write to the Free 021 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 022 * 02110-1301 USA, or see the FSF site: http://www.fsf.org. 023 */ 024 package org.jboss.dna.graph.xml; 025 026 import java.util.ArrayList; 027 import java.util.Arrays; 028 import java.util.Collection; 029 import java.util.Collections; 030 import java.util.HashMap; 031 import java.util.LinkedList; 032 import java.util.List; 033 import java.util.Map; 034 import javax.xml.parsers.SAXParser; 035 import net.jcip.annotations.NotThreadSafe; 036 import org.jboss.dna.common.text.TextDecoder; 037 import org.jboss.dna.common.text.XmlNameEncoder; 038 import org.jboss.dna.common.util.CheckArg; 039 import org.jboss.dna.graph.ExecutionContext; 040 import org.jboss.dna.graph.io.Destination; 041 import org.jboss.dna.graph.property.Name; 042 import org.jboss.dna.graph.property.NameFactory; 043 import org.jboss.dna.graph.property.NamespaceRegistry; 044 import org.jboss.dna.graph.property.Path; 045 import org.jboss.dna.graph.property.PathFactory; 046 import org.jboss.dna.graph.property.Property; 047 import org.jboss.dna.graph.property.PropertyFactory; 048 import org.jboss.dna.graph.property.basic.LocalNamespaceRegistry; 049 import org.xml.sax.Attributes; 050 import org.xml.sax.ext.DefaultHandler2; 051 import com.google.common.collect.LinkedHashMultimap; 052 import com.google.common.collect.Multimap; 053 054 /** 055 * A {@link DefaultHandler2} specialization that responds to XML content events by creating the corresponding content in the 056 * supplied graph. This implementation ignores DTD entities, XML contents, and other XML processing instructions. If other 057 * behavior is required, the appropriate methods can be overridden. (Which is why this class extends <code>DefaultHandler2</code>, 058 * which has support for processing all the different parts of XML. 059 * <p> 060 * This class can be passed to the {@link SAXParser}'s {@link SAXParser#parse(java.io.File, org.xml.sax.helpers.DefaultHandler) 061 * parse(..,DefaultHandler)} methods. 062 * </p> 063 * 064 * @author Randall Hauch 065 */ 066 @NotThreadSafe 067 public class XmlHandler extends DefaultHandler2 { 068 069 /** 070 * The choices for how attributes that have no namespace prefix should be assigned a namespace. 071 * 072 * @author Randall Hauch 073 */ 074 public enum AttributeScoping { 075 /** The attribute's namespace is the default namespace */ 076 USE_DEFAULT_NAMESPACE, 077 /** The attribute's namespace is the same namespace as the containing element */ 078 INHERIT_ELEMENT_NAMESPACE; 079 } 080 081 private final ExecutionContext context; 082 083 /** 084 * Decoder for XML names, to turn '_xHHHH_' sequences in the XML element and attribute names into the corresponding UTF-16 085 * characters. 086 */ 087 public static TextDecoder DEFAULT_DECODER = new XmlNameEncoder(); 088 089 /** 090 * The default {@link AttributeScoping}. 091 */ 092 public static AttributeScoping DEFAULT_ATTRIBUTE_SCOPING = AttributeScoping.USE_DEFAULT_NAMESPACE; 093 094 /** 095 * The destination where the content should be sent. 096 */ 097 protected final Destination destination; 098 099 /** 100 * The name of the XML attribute whose value should be used for the name of the node. For example, "jcr:name". 101 */ 102 protected final Name nameAttribute; 103 104 /** 105 * The name of the property that is to be set with the type of the XML element. For example, "jcr:name". 106 */ 107 protected final Name typeAttribute; 108 109 /** 110 * The value of the node type property, if the node's name is set with the {@link #nameAttribute}. 111 */ 112 protected final Name typeAttributeValue; 113 114 /** 115 * The cached reference to the graph's path factory. 116 */ 117 protected final PathFactory pathFactory; 118 119 /** 120 * The cached reference to the graph's name factory. 121 */ 122 protected final NameFactory nameFactory; 123 124 /** 125 * The cached reference to the graph's property factory. 126 */ 127 protected final PropertyFactory propertyFactory; 128 129 /** 130 * The cached reference to the graph's namespace registry. 131 */ 132 protected final NamespaceRegistry namespaceRegistry; 133 134 /** 135 * The TextDecoder that is used to decode the names. 136 */ 137 protected final TextDecoder decoder; 138 139 /** 140 * The stack of prefixes for each namespace, which is used to keep the {@link #namespaceRegistry local namespace registry} in 141 * sync with the namespaces in the XML document. 142 */ 143 private final Map<String, LinkedList<String>> prefixStackByUri = new HashMap<String, LinkedList<String>>(); 144 145 private final AttributeScoping attributeScoping; 146 147 /** 148 * The path for the node representing the current element. This starts out as the path supplied by the constructor, and never 149 * is shorter than that initial path. 150 */ 151 protected Path currentPath; 152 153 /** 154 * Flag the records whether the first element should be skipped. 155 */ 156 protected boolean skipFirstElement; 157 158 /** 159 * A temporary list used to store the properties for a single node. This is cleared, populated, then used to create the node. 160 */ 161 protected final List<Property> properties = new ArrayList<Property>(); 162 163 /** 164 * A working array that contains a single value object that is used to create Property objects (without having to create an 165 * array of values for each property). 166 */ 167 protected final Object[] propertyValues = new Object[1]; 168 169 /** 170 * Character buffer to aggregate nested character data 171 * 172 * @see ElementEntry 173 */ 174 private StringBuilder characterDataBuffer = new StringBuilder(); 175 176 /** 177 * Stack of pending {@link ElementEntry element entries} from the root of the imported content to the current node. 178 * 179 * @see ElementEntry 180 */ 181 private final LinkedList<ElementEntry> elementStack = new LinkedList<ElementEntry>(); 182 183 /** 184 * Create a handler that creates content in the supplied graph 185 * 186 * @param destination the destination where the content should be sent.graph in which the content should be placed 187 * @param skipRootElement true if the root element of the document should be skipped, or false if the root element should be 188 * converted to the top-level node of the content 189 * @param parent the path to the node in the graph under which the content should be placed; if null, the root node is assumed 190 * @param textDecoder the text decoder that should be used to decode the XML element names and XML attribute names, prior to 191 * using those values to create names; or null if the default encoder should be used 192 * @param nameAttribute the name of the property whose value should be used for the names of the nodes (typically, this is 193 * "jcr:name" or something equivalent); or null if the XML element name should always be used as the node name 194 * @param typeAttribute the name of the property that should be set with the type of the XML element, or null if there is no 195 * such property 196 * @param typeAttributeValue the value of the type property that should be used if the node has no <code>nameAttribute</code>, 197 * or null if the value should be set to the type of the XML element 198 * @param scoping defines how to choose the namespace of attributes that do not have a namespace prefix; if null, the 199 * {@link #DEFAULT_ATTRIBUTE_SCOPING} value is used 200 * @throws IllegalArgumentException if the destination reference is null 201 */ 202 public XmlHandler( Destination destination, 203 boolean skipRootElement, 204 Path parent, 205 TextDecoder textDecoder, 206 Name nameAttribute, 207 Name typeAttribute, 208 Name typeAttributeValue, 209 AttributeScoping scoping ) { 210 CheckArg.isNotNull(destination, "destination"); 211 assert destination != null; 212 this.destination = destination; 213 this.nameAttribute = nameAttribute; 214 this.typeAttribute = typeAttribute; 215 this.typeAttributeValue = typeAttributeValue; 216 this.decoder = textDecoder != null ? textDecoder : DEFAULT_DECODER; 217 this.skipFirstElement = skipRootElement; 218 this.attributeScoping = scoping != null ? scoping : DEFAULT_ATTRIBUTE_SCOPING; 219 220 // Use the execution context ... 221 this.context = destination.getExecutionContext(); 222 assert this.context != null; 223 224 // Set up a local namespace registry that is kept in sync with the namespaces found in this XML document ... 225 NamespaceRegistry namespaceRegistry = new LocalNamespaceRegistry(this.context.getNamespaceRegistry()); 226 final ExecutionContext localContext = this.context.with(namespaceRegistry); 227 228 // Set up references to frequently-used objects in the context ... 229 this.nameFactory = localContext.getValueFactories().getNameFactory(); 230 this.pathFactory = localContext.getValueFactories().getPathFactory(); 231 this.propertyFactory = localContext.getPropertyFactory(); 232 this.namespaceRegistry = localContext.getNamespaceRegistry(); 233 assert this.nameFactory != null; 234 assert this.pathFactory != null; 235 assert this.propertyFactory != null; 236 assert this.namespaceRegistry != null; 237 238 // Set up the initial path ... 239 this.currentPath = parent != null ? parent : this.pathFactory.createRootPath(); 240 assert this.currentPath != null; 241 } 242 243 /** 244 * {@inheritDoc} 245 * <p> 246 * This method ensures that the namespace is registered with the {@link NamespaceRegistry registry}, using the supplied prefix 247 * to register the namespace if required. Note that because this class does not really use the namespace prefixes to create 248 * {@link Name} objects, no attempt is made to match the XML namespace prefixes. 249 * </p> 250 * 251 * @see org.xml.sax.helpers.DefaultHandler#startPrefixMapping(java.lang.String, java.lang.String) 252 */ 253 @Override 254 public void startPrefixMapping( String prefix, 255 String uri ) { 256 assert uri != null; 257 // Add the prefix to the stack ... 258 LinkedList<String> prefixStack = this.prefixStackByUri.get(uri); 259 if (prefixStack == null) { 260 prefixStack = new LinkedList<String>(); 261 this.prefixStackByUri.put(uri, prefixStack); 262 } 263 prefixStack.addFirst(prefix); 264 265 // If the namespace is already registered, then we'll have to register it in the context's registry, too. 266 if (!namespaceRegistry.isRegisteredNamespaceUri(uri)) { 267 // The namespace is not already registered (locally or in the context's registry), so we have to 268 // register it with the context's registry (which the local register then inherits). 269 NamespaceRegistry contextRegistry = context.getNamespaceRegistry(); 270 if (contextRegistry.getNamespaceForPrefix(prefix) != null) { 271 // The prefix is already bound, so register and generate a unique prefix 272 context.getNamespaceRegistry().getPrefixForNamespaceUri(uri, true); 273 // Now register locally with the supplied prefix ... 274 namespaceRegistry.register(prefix, uri); 275 } else { 276 context.getNamespaceRegistry().register(prefix, uri); 277 } 278 } else { 279 // It is already registered, but re-register it locally using the supplied prefix ... 280 namespaceRegistry.register(prefix, uri); 281 } 282 } 283 284 /** 285 * {@inheritDoc} 286 * 287 * @see org.xml.sax.helpers.DefaultHandler#endPrefixMapping(java.lang.String) 288 */ 289 @Override 290 public void endPrefixMapping( String prefix ) { 291 assert prefix != null; 292 // Get the current URI for this prefix ... 293 String uri = namespaceRegistry.getNamespaceForPrefix(prefix); 294 assert uri != null; 295 296 // Get the previous prefix from the stack ... 297 LinkedList<String> prefixStack = this.prefixStackByUri.get(uri); 298 assert prefixStack != null; 299 assert !prefixStack.isEmpty(); 300 String existingPrefix = prefixStack.removeFirst(); 301 assert prefix.equals(existingPrefix); 302 303 // If there are no previous prefixes, then remove the mapping ... 304 if (prefixStack.isEmpty()) { 305 namespaceRegistry.unregister(uri); 306 prefixStackByUri.remove(uri); 307 } else { 308 String previous = prefixStack.getFirst(); 309 namespaceRegistry.register(previous, uri); 310 } 311 } 312 313 /** 314 * {@inheritDoc} 315 * 316 * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, 317 * org.xml.sax.Attributes) 318 */ 319 @Override 320 public void startElement( String uri, 321 String localName, 322 String name, 323 Attributes attributes ) { 324 // Should this (root) element be skipped? 325 if (skipFirstElement) { 326 skipFirstElement = false; 327 return; 328 } 329 assert localName != null; 330 Name nodeName = null; 331 332 ElementEntry element; 333 if (!elementStack.isEmpty()) { 334 // Add the parent 335 elementStack.peek().addAsNode(); 336 element = new ElementEntry(elementStack.peek(), currentPath, null); 337 } else { 338 element = new ElementEntry(null, currentPath, null); 339 } 340 elementStack.addFirst(element); 341 342 properties.clear(); 343 Object typePropertyValue = null; 344 // Convert each of the attributes to a property ... 345 for (int i = 0, len = attributes.getLength(); i != len; ++i) { 346 String attributeLocalName = attributes.getLocalName(i); 347 String attributeUri = attributes.getURI(i); 348 Name attributeName = null; 349 if ((attributeUri == null || attributeUri.length() == 0) && attributes.getQName(i).indexOf(':') == -1) { 350 switch (this.attributeScoping) { 351 case INHERIT_ELEMENT_NAMESPACE: 352 attributeName = nameFactory.create(uri, attributeLocalName, decoder); 353 break; 354 case USE_DEFAULT_NAMESPACE: 355 attributeName = nameFactory.create(attributeLocalName, decoder); 356 break; 357 } 358 } else { 359 attributeName = nameFactory.create(attributeUri, attributeLocalName, decoder); 360 } 361 assert attributeName != null; 362 // Check to see if this is an attribute that represents the node name (which may be null) ... 363 if (nodeName == null && attributeName.equals(nameAttribute)) { 364 nodeName = nameFactory.create(attributes.getValue(i)); // don't use a decoder 365 element.setName(nodeName); 366 continue; 367 } 368 if (typePropertyValue == null && attributeName.equals(typeAttribute)) { 369 typePropertyValue = nameFactory.create(attributes.getValue(i)); // don't use a decoder 370 continue; 371 } 372 // Create a property for this attribute ... 373 element.addProperty(attributeName, attributes.getValue(i)); 374 } 375 // Create the node name if required ... 376 if (nodeName == null) { 377 // No attribute defines the node name ... 378 nodeName = nameFactory.create(uri, localName, decoder); 379 element.setName(nodeName); 380 } else { 381 if (typePropertyValue == null) typePropertyValue = nameFactory.create(uri, localName, decoder); 382 } 383 if (typeAttribute != null) { 384 // A attribute defines the node name. Set the type property, if required 385 if (typePropertyValue == null) typePropertyValue = typeAttributeValue; 386 if (typePropertyValue != null) { 387 element.addProperty(typeAttribute, typePropertyValue); 388 } 389 } 390 391 // Update the current path ... 392 currentPath = element.path(); 393 } 394 395 /** 396 * {@inheritDoc} 397 * 398 * @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, java.lang.String, java.lang.String) 399 */ 400 @Override 401 public void endElement( String uri, 402 String localName, 403 String name ) { 404 405 String s = characterDataBuffer.toString().trim(); 406 if (s.length() > 0) { 407 elementStack.removeFirst().addAsPropertySetTo(s); 408 } else if (!elementStack.isEmpty()) { 409 elementStack.removeFirst().submit(); 410 } 411 characterDataBuffer = new StringBuilder(); 412 413 // Nothing to do but to change the current path to be the parent ... 414 currentPath = currentPath.getParent(); 415 } 416 417 /** 418 * {@inheritDoc} 419 * 420 * @see org.xml.sax.helpers.DefaultHandler#characters(char[], int, int) 421 */ 422 @Override 423 public void characters( char[] ch, 424 int start, 425 int length ) { 426 // Have to add this to a buffer as one logical set of character data can cause this method to fire multiple times 427 characterDataBuffer.append(ch, start, length); 428 } 429 430 /** 431 * {@inheritDoc} 432 * 433 * @see org.xml.sax.helpers.DefaultHandler#endDocument() 434 */ 435 @Override 436 public void endDocument() { 437 // Submit any outstanding requests (if there are any) ... 438 destination.submit(); 439 } 440 441 /** 442 * Create a property with the given name and value, obtained from an attribute name and value in the XML content. 443 * <p> 444 * By default, this method creates a property by directly using the value as the sole value of the property. 445 * </p> 446 * 447 * @param propertyName the name of the property; never null 448 * @param value the attribute value 449 * @return the property; may not be null 450 */ 451 protected Property createProperty( Name propertyName, 452 Object value ) { 453 propertyValues[0] = value; 454 return propertyFactory.create(propertyName, propertyValues); 455 } 456 457 /** 458 * Create a property with the given name and values, obtained from an attribute name and value in the XML content. 459 * <p> 460 * By default, this method creates a property by directly using the values as the values of the property. 461 * </p> 462 * 463 * @param propertyName the name of the property; never null 464 * @param values the attribute values 465 * @return the property; may not be null 466 */ 467 protected Property createProperty( Name propertyName, 468 Collection<Object> values ) { 469 return propertyFactory.create(propertyName, values); 470 } 471 472 /** 473 * Possible states for an {@link ElementEntry} instance. All element entries start in state {@code TBD} and then transition to 474 * one of the terminating states, {@code NODE} or {@code PROPERTY} when {@link ElementEntry#addAsNode()} or 475 * {@link ElementEntry#addAsPropertySetTo(Object)} is invoked. 476 */ 477 protected enum ElementEntryState { 478 NODE, 479 PROPERTY, 480 TBD 481 } 482 483 /** 484 * Element entries hold references to the data of "pending" elements. "Pending" elements are elements which have been 485 * encountered through a {@link XmlHandler#startElement(String, String, String, Attributes)} event but have not yet been fully 486 * committed to the {@link XmlHandler#destination}. 487 * <p> 488 * As the current import semantics allow elements with nested character data to be imported as properties, it is not always 489 * possible to determine whether the element represents a node or a property from within the {@code startElement} method. 490 * Therefore, {@code ElementEntries} are initially created in an {@link ElementEntryState#TBD unknown state} and submitted to 491 * the {@code destination} when it can be positively determined that the entry represents a property (if nested character data 492 * is encountered) or a node (if a child node is detected or the {@link XmlHandler#endElement(String, String, String)} method 493 * is invoked prior to encountering nested character data). 494 * </p> 495 * <p> 496 * As DNA does not currently support a way to add a value to an existing property through the Graph API, {@code 497 * ElementEntries} also contain a {@link Multimap} of property names to values. The node's properties are aggregated and only 498 * submitted to the {@code destination} when the {@link XmlHandler#endElement(String, String, String)} event fires. 499 * </p> 500 */ 501 private class ElementEntry { 502 503 private ElementEntry parent; 504 // Stored separately since the root node has no parent ElementEntry but does have a path 505 private Path pathToParent; 506 private Path pathToThisNode; 507 private Name name; 508 private Multimap<Name, Object> properties; 509 private ElementEntryState state; 510 511 protected ElementEntry( ElementEntry parent, 512 Path pathToParent, 513 Name name ) { 514 this.parent = parent; 515 this.pathToParent = pathToParent; 516 this.name = name; 517 this.state = ElementEntryState.TBD; 518 properties = new LinkedHashMultimap<Name, Object>(); 519 } 520 521 protected void setName( Name name ) { 522 this.name = name; 523 pathToThisNode = pathFactory.create(pathToParent, name); 524 } 525 526 protected void addProperty( Name propertyName, 527 Object propertyValue ) { 528 assert state != ElementEntryState.PROPERTY; 529 properties.put(propertyName, propertyValue); 530 } 531 532 protected void addAsNode() { 533 assert state != ElementEntryState.PROPERTY; 534 if (state == ElementEntryState.NODE) return; 535 536 state = ElementEntryState.NODE; 537 destination.create(pathFactory.create(pathToParent, name), Collections.<Property>emptyList()); 538 } 539 540 protected void addAsPropertySetTo( Object value ) { 541 assert state != ElementEntryState.NODE; 542 state = ElementEntryState.PROPERTY; 543 parent.addProperty(name, value); 544 } 545 546 protected final Path path() { 547 return pathToThisNode; 548 } 549 550 protected void submit() { 551 if (state == ElementEntryState.PROPERTY) return; 552 553 if (state == ElementEntryState.NODE && properties.size() == 0) return; 554 Property[] propertiesToAdd = new Property[properties.size()]; 555 int i = 0; 556 for (Name name : properties.keySet()) { 557 propertiesToAdd[i++] = createProperty(name, properties.get(name)); 558 } 559 560 if (state == ElementEntryState.TBD) { 561 // Merge the add and the create 562 destination.create(pathToThisNode, Arrays.asList(propertiesToAdd)); 563 } else { 564 destination.setProperties(pathToThisNode, propertiesToAdd); 565 } 566 } 567 } 568 }