001 /* 002 * JBoss DNA (http://www.jboss.org/dna) 003 * See the COPYRIGHT.txt file distributed with this work for information 004 * regarding copyright ownership. Some portions may be licensed 005 * to Red Hat, Inc. under one or more contributor license agreements. 006 * See the AUTHORS.txt file in the distribution for a full listing of 007 * individual contributors. 008 * 009 * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA 010 * is licensed to you under the terms of the GNU Lesser General Public License as 011 * published by the Free Software Foundation; either version 2.1 of 012 * the License, or (at your option) any later version. 013 * 014 * JBoss DNA is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 017 * Lesser General Public License for more details. 018 * 019 * You should have received a copy of the GNU Lesser General Public 020 * License along with this software; if not, write to the Free 021 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 022 * 02110-1301 USA, or see the FSF site: http://www.fsf.org. 023 */ 024 package org.jboss.dna.sequencer.xml; 025 026 import java.util.ArrayList; 027 import java.util.HashMap; 028 import java.util.LinkedList; 029 import java.util.List; 030 import java.util.Map; 031 import org.jboss.dna.common.collection.Problems; 032 import org.jboss.dna.common.text.TextDecoder; 033 import org.jboss.dna.common.text.XmlNameEncoder; 034 import org.jboss.dna.common.util.CheckArg; 035 import org.jboss.dna.common.util.StringUtil; 036 import org.jboss.dna.graph.ExecutionContext; 037 import org.jboss.dna.graph.JcrLexicon; 038 import org.jboss.dna.graph.property.Name; 039 import org.jboss.dna.graph.property.NameFactory; 040 import org.jboss.dna.graph.property.NamespaceRegistry; 041 import org.jboss.dna.graph.property.Path; 042 import org.jboss.dna.graph.property.PathFactory; 043 import org.jboss.dna.graph.property.PropertyFactory; 044 import org.jboss.dna.graph.property.ValueFormatException; 045 import org.jboss.dna.graph.property.basic.LocalNamespaceRegistry; 046 import org.jboss.dna.graph.sequencer.StreamSequencerContext; 047 import org.jboss.dna.graph.sequencer.SequencerOutput; 048 import org.xml.sax.Attributes; 049 import org.xml.sax.SAXParseException; 050 import org.xml.sax.ext.DefaultHandler2; 051 052 /** 053 * @author Randall Hauch 054 */ 055 public class XmlSequencerHandler extends DefaultHandler2 { 056 057 private final SequencerOutput output; 058 private final StreamSequencerContext context; 059 060 /** 061 * Decoder for XML names, to turn '_xHHHH_' sequences in the XML element and attribute names into the corresponding UTF-16 062 * characters. 063 */ 064 public static TextDecoder DEFAULT_DECODER = new XmlNameEncoder(); 065 066 /** 067 * The default {@link XmlSequencer.AttributeScoping}. 068 */ 069 public static XmlSequencer.AttributeScoping DEFAULT_ATTRIBUTE_SCOPING = XmlSequencer.AttributeScoping.USE_DEFAULT_NAMESPACE; 070 071 /** 072 * The name of the attribute that should be used for the node name. 073 */ 074 protected final Name nameAttribute; 075 076 /** 077 * The default primary type. 078 */ 079 protected final Name defaultPrimaryType; 080 081 /** 082 * The cached reference to the graph's path factory. 083 */ 084 protected final PathFactory pathFactory; 085 086 /** 087 * The cached reference to the graph's name factory. 088 */ 089 protected final NameFactory nameFactory; 090 091 /** 092 * The cached reference to the graph's property factory. 093 */ 094 protected final PropertyFactory propertyFactory; 095 096 /** 097 * The cached reference to the graph's namespace registry. 098 */ 099 protected final NamespaceRegistry namespaceRegistry; 100 101 /** 102 * The TextDecoder that is used to decode the names. 103 */ 104 protected final TextDecoder decoder; 105 106 /** 107 * The stack of prefixes for each namespace, which is used to keep the {@link #namespaceRegistry local namespace registry} in 108 * sync with the namespaces in the XML document. 109 */ 110 private final Map<String, LinkedList<String>> prefixStackByUri = new HashMap<String, LinkedList<String>>(); 111 112 private final XmlSequencer.AttributeScoping attributeScoping; 113 114 /** 115 * The path for the node representing the current element. This starts out as the path supplied by the constructor, and never 116 * is shorter than that initial path. 117 */ 118 protected Path currentPath; 119 120 // Recursive map used to track the number of occurrences of names for elements under a particular path 121 private Map<Name, List<IndexedName>> nameToIndexedNamesMap = new HashMap<Name, List<IndexedName>>(); 122 123 // The stack of recursive maps being processed, with the head entry being the map for the current path 124 private final LinkedList<Map<Name, List<IndexedName>>> nameToIndexedNamesMapStack = new LinkedList<Map<Name, List<IndexedName>>>(); 125 126 private String currentEntityName; 127 private StringBuilder cDataContent; 128 private StringBuilder contentBuilder; 129 private final Problems problems; 130 private final Map<String, String> entityValues = new HashMap<String, String>(); 131 132 /** 133 * @param output 134 * @param context 135 * @param nameAttribute 136 * @param defaultPrimaryType 137 * @param textDecoder 138 * @param scoping 139 */ 140 XmlSequencerHandler( SequencerOutput output, 141 StreamSequencerContext context, 142 Name nameAttribute, 143 Name defaultPrimaryType, 144 TextDecoder textDecoder, 145 XmlSequencer.AttributeScoping scoping ) { 146 CheckArg.isNotNull(output, "output"); 147 CheckArg.isNotNull(context, "context"); 148 149 // Use the execution context ... 150 this.output = output; 151 this.context = context; 152 this.problems = context.getProblems(); 153 assert this.problems != null; 154 155 this.nameAttribute = nameAttribute; 156 this.defaultPrimaryType = defaultPrimaryType; 157 this.decoder = textDecoder != null ? textDecoder : DEFAULT_DECODER; 158 this.attributeScoping = scoping != null ? scoping : DEFAULT_ATTRIBUTE_SCOPING; 159 160 // Set up a local namespace registry that is kept in sync with the namespaces found in this XML document ... 161 NamespaceRegistry namespaceRegistry = new LocalNamespaceRegistry(this.context.getNamespaceRegistry()); 162 final ExecutionContext localContext = this.context.with(namespaceRegistry); 163 164 // Set up references to frequently-used objects in the context ... 165 this.nameFactory = localContext.getValueFactories().getNameFactory(); 166 this.pathFactory = localContext.getValueFactories().getPathFactory(); 167 this.propertyFactory = localContext.getPropertyFactory(); 168 this.namespaceRegistry = localContext.getNamespaceRegistry(); 169 assert this.nameFactory != null; 170 assert this.pathFactory != null; 171 assert this.propertyFactory != null; 172 assert this.namespaceRegistry != null; 173 174 // Set up the initial path ... 175 this.currentPath = this.pathFactory.createRelativePath(); 176 assert this.currentPath != null; 177 } 178 179 private void startNode( Name name ) { 180 // Check if content still needs to be output 181 if (contentBuilder != null) endContent(); 182 // Add name to list of indexed names for this element to ensure we use the correct index (which is the size of the 183 // list) 184 List<IndexedName> indexedNames = nameToIndexedNamesMap.get(name); 185 if (indexedNames == null) { 186 indexedNames = new ArrayList<IndexedName>(); 187 nameToIndexedNamesMap.put(name, indexedNames); 188 } 189 IndexedName indexedName = new IndexedName(); 190 indexedNames.add(indexedName); 191 // Add element name and the appropriate index to the path. 192 // Per the JCR spec, the index must be relative to same-name sibling nodes 193 currentPath = pathFactory.create(currentPath, name, indexedNames.size()).getNormalizedPath(); 194 // currentPath = currentPath.getNormalizedPath(); 195 // Add the indexed name map to the stack and set the current map to the new element's map 196 nameToIndexedNamesMapStack.addFirst(nameToIndexedNamesMap); 197 nameToIndexedNamesMap = indexedName.nameToIndexedNamesMap; 198 } 199 200 private void endNode() { 201 // Recover parent's path, namespace, and indexedName map, clearing the ended element's map to free memory 202 currentPath = currentPath.getParent(); 203 currentPath = currentPath.getNormalizedPath(); 204 nameToIndexedNamesMap.clear(); 205 nameToIndexedNamesMap = nameToIndexedNamesMapStack.removeFirst(); 206 } 207 208 /** 209 * See if there is any element content that needs to be completed. 210 */ 211 protected void endContent() { 212 // Process the content of the element ... 213 String content = StringUtil.normalize(contentBuilder.toString()); 214 // Null-out builder to setup for subsequent content. 215 // Must be done before call to startElement below to prevent infinite loop. 216 contentBuilder = null; 217 // Skip if nothing in content but whitespace 218 if (content.length() > 0) { 219 // Create separate node for each content entry since entries can be interspersed amongst child elements 220 startNode(DnaXmlLexicon.ELEMENT_CONTENT); 221 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, DnaXmlLexicon.ELEMENT_CONTENT); 222 output.setProperty(currentPath, DnaXmlLexicon.ELEMENT_CONTENT, content); 223 endNode(); 224 } 225 } 226 227 /** 228 * <p> 229 * {@inheritDoc} 230 * </p> 231 * 232 * @see org.xml.sax.helpers.DefaultHandler#startDocument() 233 */ 234 @Override 235 public void startDocument() { 236 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, DnaXmlLexicon.DOCUMENT); 237 } 238 239 /** 240 * <p> 241 * {@inheritDoc} 242 * </p> 243 * 244 * @see org.xml.sax.ext.DefaultHandler2#startDTD(java.lang.String, java.lang.String, java.lang.String) 245 */ 246 @Override 247 public void startDTD( String name, 248 String publicId, 249 String systemId ) { 250 output.setProperty(currentPath, DnaDtdLexicon.NAME, name); 251 output.setProperty(currentPath, DnaDtdLexicon.PUBLIC_ID, publicId); 252 output.setProperty(currentPath, DnaDtdLexicon.SYSTEM_ID, systemId); 253 } 254 255 /** 256 * <p> 257 * {@inheritDoc} 258 * </p> 259 * 260 * @see org.xml.sax.ext.DefaultHandler2#externalEntityDecl(java.lang.String, java.lang.String, java.lang.String) 261 */ 262 @Override 263 public void externalEntityDecl( String name, 264 String publicId, 265 String systemId ) { 266 // Add "synthetic" entity container to path to help prevent name collisions with XML elements 267 startNode(DnaDtdLexicon.ENTITY); 268 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, DnaDtdLexicon.ENTITY); 269 output.setProperty(currentPath, DnaDtdLexicon.NAME, name); 270 if (publicId != null) output.setProperty(currentPath, DnaDtdLexicon.PUBLIC_ID, publicId); 271 if (systemId != null) output.setProperty(currentPath, DnaDtdLexicon.SYSTEM_ID, systemId); 272 endNode(); 273 } 274 275 /** 276 * {@inheritDoc} 277 * 278 * @see org.xml.sax.ext.DefaultHandler2#internalEntityDecl(java.lang.String, java.lang.String) 279 */ 280 @Override 281 public void internalEntityDecl( String name, 282 String value ) { 283 // Add "synthetic" entity container to path to help prevent name collisions with XML elements 284 startNode(DnaDtdLexicon.ENTITY); 285 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, DnaDtdLexicon.ENTITY); 286 output.setProperty(currentPath, DnaDtdLexicon.NAME, name); 287 output.setProperty(currentPath, DnaDtdLexicon.VALUE, value); 288 // Record the name/value pair ... 289 entityValues.put(name, value); 290 endNode(); 291 } 292 293 /** 294 * <p> 295 * {@inheritDoc} 296 * </p> 297 * 298 * @see org.xml.sax.helpers.DefaultHandler#processingInstruction(java.lang.String, java.lang.String) 299 */ 300 @Override 301 public void processingInstruction( String target, 302 String data ) { 303 // Output separate nodes for each instruction since multiple are allowed 304 startNode(DnaXmlLexicon.PROCESSING_INSTRUCTION); 305 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, DnaXmlLexicon.PROCESSING_INSTRUCTION); 306 output.setProperty(currentPath, DnaXmlLexicon.TARGET, target.trim()); 307 if (data != null) { 308 output.setProperty(currentPath, DnaXmlLexicon.PROCESSING_INSTRUCTION_CONTENT, data.trim()); 309 } 310 endNode(); 311 } 312 313 /** 314 * {@inheritDoc} 315 * <p> 316 * This method ensures that the namespace is registered with the {@link NamespaceRegistry registry}, using the supplied prefix 317 * to register the namespace if required. Note that because this class does not really use the namespace prefixes to create 318 * {@link Name} objects, no attempt is made to match the XML namespace prefixes. 319 * </p> 320 * 321 * @see org.xml.sax.helpers.DefaultHandler#startPrefixMapping(java.lang.String, java.lang.String) 322 */ 323 @Override 324 public void startPrefixMapping( String prefix, 325 String uri ) { 326 assert uri != null; 327 // Add the prefix to the stack ... 328 LinkedList<String> prefixStack = this.prefixStackByUri.get(uri); 329 if (prefixStack == null) { 330 prefixStack = new LinkedList<String>(); 331 this.prefixStackByUri.put(uri, prefixStack); 332 } 333 prefixStack.addFirst(prefix); 334 335 // If the namespace is already registered, then we'll have to register it in the context's registry, too. 336 if (!namespaceRegistry.isRegisteredNamespaceUri(uri)) { 337 // The namespace is not already registered (locally or in the context's registry), so we have to 338 // register it with the context's registry (which the local register then inherits). 339 NamespaceRegistry contextRegistry = context.getNamespaceRegistry(); 340 if (contextRegistry.getNamespaceForPrefix(prefix) != null) { 341 // The prefix is already bound, so register and generate a unique prefix 342 context.getNamespaceRegistry().getPrefixForNamespaceUri(uri, true); 343 // Now register locally with the supplied prefix ... 344 namespaceRegistry.register(prefix, uri); 345 } else { 346 context.getNamespaceRegistry().register(prefix, uri); 347 } 348 } else { 349 // It is already registered, but re-register it locally using the supplied prefix ... 350 namespaceRegistry.register(prefix, uri); 351 } 352 } 353 354 /** 355 * {@inheritDoc} 356 * 357 * @see org.xml.sax.helpers.DefaultHandler#endPrefixMapping(java.lang.String) 358 */ 359 @Override 360 public void endPrefixMapping( String prefix ) { 361 assert prefix != null; 362 // Get the current URI for this prefix ... 363 String uri = namespaceRegistry.getNamespaceForPrefix(prefix); 364 assert uri != null; 365 366 // Get the previous prefix from the stack ... 367 LinkedList<String> prefixStack = this.prefixStackByUri.get(uri); 368 assert prefixStack != null; 369 assert !prefixStack.isEmpty(); 370 String existingPrefix = prefixStack.removeFirst(); 371 assert prefix.equals(existingPrefix); 372 373 // If there are no previous prefixes, then remove the mapping ... 374 if (prefixStack.isEmpty()) { 375 namespaceRegistry.unregister(uri); 376 prefixStackByUri.remove(uri); 377 } else { 378 String previous = prefixStack.getFirst(); 379 namespaceRegistry.register(previous, uri); 380 } 381 } 382 383 /** 384 * <p> 385 * {@inheritDoc} 386 * </p> 387 * 388 * @see org.xml.sax.ext.DefaultHandler2#startEntity(java.lang.String) 389 */ 390 @Override 391 public void startEntity( String name ) { 392 // Record that we've started an entity by capturing the name of the entity ... 393 currentEntityName = name; 394 } 395 396 /** 397 * <p> 398 * {@inheritDoc} 399 * </p> 400 * 401 * @see org.xml.sax.ext.DefaultHandler2#endEntity(java.lang.String) 402 */ 403 @Override 404 public void endEntity( String name ) { 405 // currentEntityName is nulled in 'characters(...)', not here. 406 // See DNA-231 for an issue related to this 407 } 408 409 /** 410 * <p> 411 * {@inheritDoc} 412 * </p> 413 * 414 * @see org.xml.sax.ext.DefaultHandler2#startCDATA() 415 */ 416 @Override 417 public void startCDATA() { 418 // CDATA sections can start in the middle of element content, so there may already be some 419 // element content already processed ... 420 if (contentBuilder != null) endContent(); 421 422 // Prepare builder for concatenating consecutive lines of CDATA 423 cDataContent = new StringBuilder(); 424 } 425 426 /** 427 * {@inheritDoc} 428 * 429 * @see org.xml.sax.ext.DefaultHandler2#endCDATA() 430 */ 431 @Override 432 public void endCDATA() { 433 // Output CDATA built in characters() method 434 startNode(DnaXmlLexicon.CDATA); 435 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, defaultPrimaryType); 436 output.setProperty(currentPath, DnaXmlLexicon.CDATA_CONTENT, cDataContent.toString()); 437 endNode(); 438 // Null-out builder to free memory 439 cDataContent = null; 440 } 441 442 /** 443 * {@inheritDoc} 444 * 445 * @see org.xml.sax.helpers.DefaultHandler#characters(char[], int, int) 446 */ 447 @Override 448 public void characters( char[] ch, 449 int start, 450 int length ) { 451 String content = String.valueOf(ch, start, length); 452 if (cDataContent != null) { 453 // Processing the characters in the CDATA, so add to the builder 454 cDataContent.append(ch, start, length); 455 // Text within builder will be output at the end of CDATA 456 } else { 457 if (contentBuilder == null) { 458 // This is the first line of content, so we have to create the StringBuilder ... 459 contentBuilder = new StringBuilder(); 460 } 461 if (currentEntityName != null) { 462 // This is an entity reference, so rather than use the entity value characters (the content passed 463 // into this method), we want to keep the entity reference ... 464 contentBuilder.append('&').append(currentEntityName).append(';'); 465 466 // Normally, 'characters' is called with just the entity replacement characters, 467 // and is called between 'startEntity' and 'endEntity'. However, per DNA-231, some JVMs 468 // use an incorrect ordering: 'startEntity', 'endEntity' and then 'characters', and the 469 // content passed to the 'characters' call not only includes the entity replacement characters 470 // followed by other content. Look for this condition ... 471 String entityValue = entityValues.get(currentEntityName); 472 if (!content.equals(entityValue) && entityValue != null && entityValue.length() < content.length()) { 473 // Per DNA-231, there's extra content after the entity value. So replace the entity value in the 474 // content with the entity reference (not the replacement characters), and add the extra content ... 475 String extraContent = content.substring(entityValue.length()); 476 contentBuilder.append(extraContent); 477 } 478 // We're done reading the entity characters, so null it out 479 currentEntityName = null; 480 } else { 481 // Just append the content normally ... 482 contentBuilder.append(content); 483 } 484 // Text within builder will be output when another element or CDATA is encountered 485 } 486 } 487 488 /** 489 * {@inheritDoc} 490 * 491 * @see org.xml.sax.ext.DefaultHandler2#comment(char[], int, int) 492 */ 493 @Override 494 public void comment( char[] ch, 495 int start, 496 int length ) { 497 // Output separate nodes for each comment since multiple are allowed 498 startNode(DnaXmlLexicon.COMMENT); 499 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, DnaXmlLexicon.COMMENT); 500 output.setProperty(currentPath, DnaXmlLexicon.COMMENT_CONTENT, String.valueOf(ch, start, length).trim()); 501 endNode(); 502 } 503 504 /** 505 * {@inheritDoc} 506 * 507 * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, 508 * org.xml.sax.Attributes) 509 */ 510 @Override 511 public void startElement( String uri, 512 String localName, 513 String name, 514 Attributes attributes ) { 515 assert localName != null; 516 517 // Create the node with the name built from the element's name ... 518 Name nodeName = null; 519 if (nameAttribute != null) { 520 try { 521 String jcrNameValue = attributes.getValue(nameAttribute.getNamespaceUri(), nameAttribute.getLocalName()); 522 nodeName = nameFactory.create(jcrNameValue); 523 } catch (ValueFormatException e) { 524 } 525 } 526 if (nodeName == null) nodeName = nameFactory.create(uri, localName, decoder); 527 startNode(nodeName); 528 529 // Set the type of the node ... 530 if (defaultPrimaryType != null) { 531 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, defaultPrimaryType); 532 } 533 534 // Now, set each attribute as a property ... 535 for (int i = 0, len = attributes.getLength(); i != len; ++i) { 536 String attributeLocalName = attributes.getLocalName(i); 537 String attributeUri = attributes.getURI(i); 538 Name attributeName = null; 539 if ((attributeUri == null || attributeUri.length() == 0) && attributes.getQName(i).indexOf(':') == -1) { 540 switch (this.attributeScoping) { 541 case INHERIT_ELEMENT_NAMESPACE: 542 attributeName = nameFactory.create(uri, attributeLocalName, decoder); 543 break; 544 case USE_DEFAULT_NAMESPACE: 545 attributeName = nameFactory.create(attributeLocalName, decoder); 546 break; 547 } 548 } else { 549 attributeName = nameFactory.create(attributeUri, attributeLocalName, decoder); 550 } 551 assert attributeName != null; 552 if (JcrLexicon.NAME.equals(attributeName)) { 553 // We don't want to record the "jcr:name" attribute since it won't match the node name ... 554 continue; 555 } 556 Object value = attributes.getValue(i); 557 if (JcrLexicon.PRIMARY_TYPE.equals(attributeName)) { 558 // Convert it to a name ... 559 value = nameFactory.create(value); 560 } 561 output.setProperty(currentPath, attributeName, attributes.getValue(i)); 562 } 563 } 564 565 /** 566 * {@inheritDoc} 567 * 568 * @see org.jboss.dna.graph.xml.XmlHandler#endElement(java.lang.String, java.lang.String, java.lang.String) 569 */ 570 @Override 571 public void endElement( String uri, 572 String localName, 573 String name ) { 574 // Check if content still needs to be output 575 if (contentBuilder != null) endContent(); 576 577 // End the current node ... 578 endNode(); 579 } 580 581 /** 582 * <p> 583 * {@inheritDoc} 584 * </p> 585 * 586 * @see org.xml.sax.helpers.DefaultHandler#warning(org.xml.sax.SAXParseException) 587 */ 588 @Override 589 public void warning( SAXParseException warning ) { 590 problems.addWarning(warning, XmlSequencerI18n.warningSequencingXmlDocument, warning); 591 } 592 593 /** 594 * {@inheritDoc} 595 * 596 * @see org.xml.sax.helpers.DefaultHandler#error(org.xml.sax.SAXParseException) 597 */ 598 @Override 599 public void error( SAXParseException error ) { 600 problems.addError(error, XmlSequencerI18n.errorSequencingXmlDocument, error); 601 } 602 603 /** 604 * {@inheritDoc} 605 * 606 * @see org.xml.sax.helpers.DefaultHandler#fatalError(org.xml.sax.SAXParseException) 607 */ 608 @Override 609 public void fatalError( SAXParseException error ) { 610 problems.addError(error, XmlSequencerI18n.errorSequencingXmlDocument, error); 611 } 612 613 private class IndexedName { 614 615 Map<Name, List<IndexedName>> nameToIndexedNamesMap = new HashMap<Name, List<IndexedName>>(); 616 617 IndexedName() { 618 } 619 } 620 }