001 /* 002 * JBoss, Home of Professional Open Source. 003 * Copyright 2008, Red Hat Middleware LLC, and individual contributors 004 * as indicated by the @author tags. See the copyright.txt file in the 005 * distribution for a full listing of individual contributors. 006 * 007 * This is free software; you can redistribute it and/or modify it 008 * under the terms of the GNU Lesser General Public License as 009 * published by the Free Software Foundation; either version 2.1 of 010 * the License, or (at your option) any later version. 011 * 012 * This software is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * You should have received a copy of the GNU Lesser General Public 018 * License along with this software; if not, write to the Free 019 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 020 * 02110-1301 USA, or see the FSF site: http://www.fsf.org. 021 */ 022 package org.jboss.dna.graph.xml; 023 024 import java.util.ArrayList; 025 import java.util.HashMap; 026 import java.util.LinkedList; 027 import java.util.List; 028 import java.util.Map; 029 import javax.xml.parsers.SAXParser; 030 import net.jcip.annotations.NotThreadSafe; 031 import org.jboss.dna.common.text.TextDecoder; 032 import org.jboss.dna.common.text.XmlNameEncoder; 033 import org.jboss.dna.common.util.CheckArg; 034 import org.jboss.dna.graph.BasicExecutionContext; 035 import org.jboss.dna.graph.ExecutionContext; 036 import org.jboss.dna.graph.properties.Name; 037 import org.jboss.dna.graph.properties.NameFactory; 038 import org.jboss.dna.graph.properties.NamespaceRegistry; 039 import org.jboss.dna.graph.properties.Path; 040 import org.jboss.dna.graph.properties.PathFactory; 041 import org.jboss.dna.graph.properties.Property; 042 import org.jboss.dna.graph.properties.PropertyFactory; 043 import org.jboss.dna.graph.properties.basic.LocalNamespaceRegistry; 044 import org.xml.sax.Attributes; 045 import org.xml.sax.ext.DefaultHandler2; 046 047 /** 048 * A {@link DefaultHandler2} specialization that responds to XML content events by creating the corresponding content in the 049 * supplied graph. This implementation ignores DTD entities, XML contents, and other XML processing instructions. If other 050 * behavior is required, the appropriate methods can be overridden. (Which is why this class extends <code>DefaultHandler2</code>, 051 * which has support for processing all the different parts of XML. 052 * <p> 053 * This class can be passed to the {@link SAXParser}'s {@link SAXParser#parse(java.io.File, org.xml.sax.helpers.DefaultHandler) 054 * parse(..,DefaultHandler)} methods. 055 * </p> 056 * 057 * @author Randall Hauch 058 */ 059 @NotThreadSafe 060 public class XmlHandler extends DefaultHandler2 { 061 062 /** 063 * The choices for how attributes that have no namespace prefix should be assigned a namespace. 064 * 065 * @author Randall Hauch 066 */ 067 public enum AttributeScoping { 068 /** The attribute's namespace is the default namespace */ 069 USE_DEFAULT_NAMESPACE, 070 /** The attribute's namespace is the same namespace as the containing element */ 071 INHERIT_ELEMENT_NAMESPACE; 072 } 073 074 private final ExecutionContext context; 075 076 /** 077 * Decoder for XML names, to turn '_xHHHH_' sequences in the XML element and attribute names into the corresponding UTF-16 078 * characters. 079 */ 080 public static TextDecoder DEFAULT_DECODER = new XmlNameEncoder(); 081 082 /** 083 * The default {@link AttributeScoping}. 084 */ 085 public static AttributeScoping DEFAULT_ATTRIBUTE_SCOPING = AttributeScoping.USE_DEFAULT_NAMESPACE; 086 087 /** 088 * The destination where the content should be sent. 089 */ 090 protected final Destination destination; 091 092 /** 093 * The name of the XML attribute whose value should be used for the name of the node. For example, "jcr:name". 094 */ 095 protected final Name nameAttribute; 096 097 /** 098 * The name of the property that is to be set with the type of the XML element. For example, "jcr:name". 099 */ 100 protected final Name typeAttribute; 101 102 /** 103 * The value of the node type property, if the node's name is set with the {@link #nameAttribute}. 104 */ 105 protected final Name typeAttributeValue; 106 107 /** 108 * The cached reference to the graph's path factory. 109 */ 110 protected final PathFactory pathFactory; 111 112 /** 113 * The cached reference to the graph's name factory. 114 */ 115 protected final NameFactory nameFactory; 116 117 /** 118 * The cached reference to the graph's property factory. 119 */ 120 protected final PropertyFactory propertyFactory; 121 122 /** 123 * The cached reference to the graph's namespace registry. 124 */ 125 protected final NamespaceRegistry namespaceRegistry; 126 127 /** 128 * The TextDecoder that is used to decode the names. 129 */ 130 protected final TextDecoder decoder; 131 132 /** 133 * The stack of prefixes for each namespace, which is used to keep the {@link #namespaceRegistry local namespace registry} in 134 * sync with the namespaces in the XML document. 135 */ 136 private final Map<String, LinkedList<String>> prefixStackByUri = new HashMap<String, LinkedList<String>>(); 137 138 private final AttributeScoping attributeScoping; 139 140 /** 141 * The path for the node representing the current element. This starts out as the path supplied by the constructor, and never 142 * is shorter than that initial path. 143 */ 144 protected Path currentPath; 145 146 /** 147 * Flag the records whether the first element should be skipped. 148 */ 149 protected boolean skipFirstElement; 150 151 /** 152 * A temporary list used to store the properties for a single node. This is cleared, populated, then used to create the node. 153 */ 154 protected final List<Property> properties = new ArrayList<Property>(); 155 156 /** 157 * A working array that contains a single value object that is used to create Property objects (without having to create an 158 * array of values for each property). 159 */ 160 protected final Object[] propertyValues = new Object[1]; 161 162 /** 163 * Create a handler that creates content in the supplied graph 164 * 165 * @param destination the destination where the content should be sent.graph in which the content should be placed 166 * @param skipRootElement true if the root element of the document should be skipped, or false if the root element should be 167 * converted to the top-level node of the content 168 * @param parent the path to the node in the graph under which the content should be placed; if null, the root node is assumed 169 * @param textDecoder the text decoder that should be used to decode the XML element names and XML attribute names, prior to 170 * using those values to create names; or null if the default encoder should be used 171 * @param nameAttribute the name of the property whose value should be used for the names of the nodes (typically, this is 172 * "jcr:name" or something equivalent); or null if the XML element name should always be used as the node name 173 * @param typeAttribute the name of the property that should be set with the type of the XML element, or null if there is no 174 * such property 175 * @param typeAttributeValue the value of the type property that should be used if the node has no <code>nameAttribute</code>, 176 * or null if the value should be set to the type of the XML element 177 * @param scoping defines how to choose the namespace of attributes that do not have a namespace prefix; if null, the 178 * {@link #DEFAULT_ATTRIBUTE_SCOPING} value is used 179 * @throws IllegalArgumentException if the destination reference is null 180 */ 181 public XmlHandler( Destination destination, 182 boolean skipRootElement, 183 Path parent, 184 TextDecoder textDecoder, 185 Name nameAttribute, 186 Name typeAttribute, 187 Name typeAttributeValue, 188 AttributeScoping scoping ) { 189 CheckArg.isNotNull(destination, "destination"); 190 assert destination != null; 191 this.destination = destination; 192 this.nameAttribute = nameAttribute; 193 this.typeAttribute = typeAttribute; 194 this.typeAttributeValue = typeAttributeValue; 195 this.decoder = textDecoder != null ? textDecoder : DEFAULT_DECODER; 196 this.skipFirstElement = skipRootElement; 197 this.attributeScoping = scoping != null ? scoping : DEFAULT_ATTRIBUTE_SCOPING; 198 199 // Use the execution context ... 200 this.context = destination.getExecutionContext(); 201 assert this.context != null; 202 203 // Set up a local namespace registry that is kept in sync with the namespaces found in this XML document ... 204 NamespaceRegistry namespaceRegistry = new LocalNamespaceRegistry(this.context.getNamespaceRegistry()); 205 final ExecutionContext localContext = new BasicExecutionContext(this.context, namespaceRegistry); 206 207 // Set up references to frequently-used objects in the context ... 208 this.nameFactory = localContext.getValueFactories().getNameFactory(); 209 this.pathFactory = localContext.getValueFactories().getPathFactory(); 210 this.propertyFactory = localContext.getPropertyFactory(); 211 this.namespaceRegistry = localContext.getNamespaceRegistry(); 212 assert this.nameFactory != null; 213 assert this.pathFactory != null; 214 assert this.propertyFactory != null; 215 assert this.namespaceRegistry != null; 216 217 // Set up the initial path ... 218 this.currentPath = parent != null ? parent : this.pathFactory.createRootPath(); 219 assert this.currentPath != null; 220 } 221 222 /** 223 * {@inheritDoc} 224 * <p> 225 * This method ensures that the namespace is registered with the {@link NamespaceRegistry registry}, using the supplied prefix 226 * to register the namespace if required. Note that because this class does not really use the namespace prefixes to create 227 * {@link Name} objects, no attempt is made to match the XML namespace prefixes. 228 * </p> 229 * 230 * @see org.xml.sax.helpers.DefaultHandler#startPrefixMapping(java.lang.String, java.lang.String) 231 */ 232 @Override 233 public void startPrefixMapping( String prefix, 234 String uri ) { 235 assert uri != null; 236 // Add the prefix to the stack ... 237 LinkedList<String> prefixStack = this.prefixStackByUri.get(uri); 238 if (prefixStack == null) { 239 prefixStack = new LinkedList<String>(); 240 this.prefixStackByUri.put(uri, prefixStack); 241 } 242 prefixStack.addFirst(prefix); 243 244 // If the namespace is already registered, then we'll have to register it in the context's registry, too. 245 if (!namespaceRegistry.isRegisteredNamespaceUri(uri)) { 246 // The namespace is not already registered (locally or in the context's registry), so we have to 247 // register it with the context's registry (which the local register then inherits). 248 NamespaceRegistry contextRegistry = context.getNamespaceRegistry(); 249 if (contextRegistry.getNamespaceForPrefix(prefix) != null) { 250 // The prefix is already bound, so register and generate a unique prefix 251 context.getNamespaceRegistry().getPrefixForNamespaceUri(uri, true); 252 // Now register locally with the supplied prefix ... 253 namespaceRegistry.register(prefix, uri); 254 } else { 255 context.getNamespaceRegistry().register(prefix, uri); 256 } 257 } else { 258 // It is already registered, but re-register it locally using the supplied prefix ... 259 namespaceRegistry.register(prefix, uri); 260 } 261 } 262 263 /** 264 * {@inheritDoc} 265 * 266 * @see org.xml.sax.helpers.DefaultHandler#endPrefixMapping(java.lang.String) 267 */ 268 @Override 269 public void endPrefixMapping( String prefix ) { 270 assert prefix != null; 271 // Get the current URI for this prefix ... 272 String uri = namespaceRegistry.getNamespaceForPrefix(prefix); 273 assert uri != null; 274 275 // Get the previous prefix from the stack ... 276 LinkedList<String> prefixStack = this.prefixStackByUri.get(uri); 277 assert prefixStack != null; 278 assert !prefixStack.isEmpty(); 279 String existingPrefix = prefixStack.removeFirst(); 280 assert prefix.equals(existingPrefix); 281 282 // If there are no previous prefixes, then remove the mapping ... 283 if (prefixStack.isEmpty()) { 284 namespaceRegistry.unregister(uri); 285 prefixStackByUri.remove(uri); 286 } else { 287 String previous = prefixStack.getFirst(); 288 namespaceRegistry.register(previous, uri); 289 } 290 } 291 292 /** 293 * {@inheritDoc} 294 * 295 * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, 296 * org.xml.sax.Attributes) 297 */ 298 @Override 299 public void startElement( String uri, 300 String localName, 301 String name, 302 Attributes attributes ) { 303 // Should this (root) element be skipped? 304 if (skipFirstElement) { 305 skipFirstElement = false; 306 return; 307 } 308 assert localName != null; 309 Name nodeName = null; 310 311 properties.clear(); 312 Object typePropertyValue = null; 313 // Convert each of the attributes to a property ... 314 for (int i = 0, len = attributes.getLength(); i != len; ++i) { 315 String attributeLocalName = attributes.getLocalName(i); 316 String attributeUri = attributes.getURI(i); 317 Name attributeName = null; 318 if ((attributeUri == null || attributeUri.length() == 0) && attributes.getQName(i).indexOf(':') == -1) { 319 switch (this.attributeScoping) { 320 case INHERIT_ELEMENT_NAMESPACE: 321 attributeName = nameFactory.create(uri, attributeLocalName, decoder); 322 break; 323 case USE_DEFAULT_NAMESPACE: 324 attributeName = nameFactory.create(attributeLocalName, decoder); 325 break; 326 } 327 } else { 328 attributeName = nameFactory.create(attributeUri, attributeLocalName, decoder); 329 } 330 assert attributeName != null; 331 // Check to see if this is an attribute that represents the node name (which may be null) ... 332 if (nodeName == null && attributeName.equals(nameAttribute)) { 333 nodeName = nameFactory.create(attributes.getValue(i)); // don't use a decoder 334 continue; 335 } 336 if (typePropertyValue == null && attributeName.equals(typeAttribute)) { 337 typePropertyValue = nameFactory.create(attributes.getValue(i)); // don't use a decoder 338 continue; 339 } 340 // Create a property for this attribute ... 341 Property property = createProperty(attributeName, attributes.getValue(i)); 342 properties.add(property); 343 } 344 // Create the node name if required ... 345 if (nodeName == null) { 346 // No attribute defines the node name ... 347 nodeName = nameFactory.create(uri, localName, decoder); 348 } else { 349 typePropertyValue = nameFactory.create(uri, localName, decoder); 350 } 351 if (typeAttribute != null) { 352 // A attribute defines the node name. Set the type property, if required 353 if (typePropertyValue == null) typePropertyValue = typeAttributeValue; 354 if (typePropertyValue != null) { 355 propertyValues[0] = typePropertyValue; 356 Property property = propertyFactory.create(typeAttribute, propertyValues); 357 properties.add(property); 358 } 359 } 360 // Update the current path ... 361 currentPath = pathFactory.create(currentPath, nodeName); 362 // Create the node, and note that we don't care about same-name siblings (as the graph will correct them) ... 363 destination.create(currentPath, properties); 364 } 365 366 /** 367 * {@inheritDoc} 368 * 369 * @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, java.lang.String, java.lang.String) 370 */ 371 @Override 372 public void endElement( String uri, 373 String localName, 374 String name ) { 375 // Nothing to do but to change the current path to be the parent ... 376 currentPath = currentPath.getParent(); 377 } 378 379 /** 380 * {@inheritDoc} 381 * 382 * @see org.xml.sax.helpers.DefaultHandler#endDocument() 383 */ 384 @Override 385 public void endDocument() { 386 // Submit any outstanding requests (if there are any) ... 387 destination.submit(); 388 } 389 390 /** 391 * Create a property with the given name and value, obtained from an attribute name and value in the XML content. 392 * <p> 393 * By default, this method creates a property by directly using the value as the sole value of the property. 394 * </p> 395 * 396 * @param propertyName the name of the property; never null 397 * @param value the attribute value 398 * @return the property; may not be null 399 */ 400 protected Property createProperty( Name propertyName, 401 Object value ) { 402 propertyValues[0] = value; 403 Property result = propertyFactory.create(propertyName, propertyValues); 404 return result; 405 } 406 407 /** 408 * Interface used internally as the destination for the requests. This is used to abstract whether the requests should be 409 * submitted immediately or in a single batch. 410 * 411 * @author Randall Hauch 412 */ 413 @NotThreadSafe 414 public static interface Destination { 415 416 /** 417 * Obtain the execution context of the destination. 418 * 419 * @return the destination's execution context 420 */ 421 public ExecutionContext getExecutionContext(); 422 423 /** 424 * Create a node at the supplied path and with the supplied attributes. The path will be absolute. 425 * 426 * @param path the absolute path of the node 427 * @param properties the properties for the node; never null, but may be empty if there are no properties 428 */ 429 public void create( Path path, 430 List<Property> properties ); 431 432 /** 433 * Create a node at the supplied path and with the supplied attributes. The path will be absolute. 434 * 435 * @param path the absolute path of the node 436 * @param firstProperty the first property 437 * @param additionalProperties the remaining properties for the node 438 */ 439 public void create( Path path, 440 Property firstProperty, 441 Property... additionalProperties ); 442 443 /** 444 * Signal to this destination that any enqueued create requests should be submitted. Usually this happens at the end of 445 * the document parsing, but an implementer must allow for it to be called multiple times and anytime during parsing. 446 */ 447 public void submit(); 448 } 449 }