001 /* 002 * JBoss, Home of Professional Open Source. 003 * Copyright 2008, Red Hat Middleware LLC, and individual contributors 004 * as indicated by the @author tags. See the copyright.txt file in the 005 * distribution for a full listing of individual contributors. 006 * 007 * This is free software; you can redistribute it and/or modify it 008 * under the terms of the GNU Lesser General Public License as 009 * published by the Free Software Foundation; either version 2.1 of 010 * the License, or (at your option) any later version. 011 * 012 * This software is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * You should have received a copy of the GNU Lesser General Public 018 * License along with this software; if not, write to the Free 019 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 020 * 02110-1301 USA, or see the FSF site: http://www.fsf.org. 021 */ 022 package org.jboss.dna.sequencer.xml; 023 024 import java.util.ArrayList; 025 import java.util.HashMap; 026 import java.util.LinkedList; 027 import java.util.List; 028 import java.util.Map; 029 import org.jboss.dna.common.collection.Problems; 030 import org.jboss.dna.common.text.TextDecoder; 031 import org.jboss.dna.common.text.XmlNameEncoder; 032 import org.jboss.dna.common.util.CheckArg; 033 import org.jboss.dna.common.util.StringUtil; 034 import org.jboss.dna.graph.BasicExecutionContext; 035 import org.jboss.dna.graph.ExecutionContext; 036 import org.jboss.dna.graph.JcrLexicon; 037 import org.jboss.dna.graph.properties.Name; 038 import org.jboss.dna.graph.properties.NameFactory; 039 import org.jboss.dna.graph.properties.NamespaceRegistry; 040 import org.jboss.dna.graph.properties.Path; 041 import org.jboss.dna.graph.properties.PathFactory; 042 import org.jboss.dna.graph.properties.PropertyFactory; 043 import org.jboss.dna.graph.properties.ValueFormatException; 044 import org.jboss.dna.graph.properties.basic.LocalNamespaceRegistry; 045 import org.jboss.dna.graph.sequencers.SequencerContext; 046 import org.jboss.dna.graph.sequencers.SequencerOutput; 047 import org.xml.sax.Attributes; 048 import org.xml.sax.SAXParseException; 049 import org.xml.sax.ext.DefaultHandler2; 050 051 /** 052 * @author Randall Hauch 053 */ 054 public class XmlSequencerHandler extends DefaultHandler2 { 055 056 private final SequencerOutput output; 057 private final SequencerContext context; 058 059 /** 060 * Decoder for XML names, to turn '_xHHHH_' sequences in the XML element and attribute names into the corresponding UTF-16 061 * characters. 062 */ 063 public static TextDecoder DEFAULT_DECODER = new XmlNameEncoder(); 064 065 /** 066 * The default {@link XmlSequencer.AttributeScoping}. 067 */ 068 public static XmlSequencer.AttributeScoping DEFAULT_ATTRIBUTE_SCOPING = XmlSequencer.AttributeScoping.USE_DEFAULT_NAMESPACE; 069 070 /** 071 * The name of the attribute that should be used for the node name. 072 */ 073 protected final Name nameAttribute; 074 075 /** 076 * The default primary type. 077 */ 078 protected final Name defaultPrimaryType; 079 080 /** 081 * The cached reference to the graph's path factory. 082 */ 083 protected final PathFactory pathFactory; 084 085 /** 086 * The cached reference to the graph's name factory. 087 */ 088 protected final NameFactory nameFactory; 089 090 /** 091 * The cached reference to the graph's property factory. 092 */ 093 protected final PropertyFactory propertyFactory; 094 095 /** 096 * The cached reference to the graph's namespace registry. 097 */ 098 protected final NamespaceRegistry namespaceRegistry; 099 100 /** 101 * The TextDecoder that is used to decode the names. 102 */ 103 protected final TextDecoder decoder; 104 105 /** 106 * The stack of prefixes for each namespace, which is used to keep the {@link #namespaceRegistry local namespace registry} in 107 * sync with the namespaces in the XML document. 108 */ 109 private final Map<String, LinkedList<String>> prefixStackByUri = new HashMap<String, LinkedList<String>>(); 110 111 private final XmlSequencer.AttributeScoping attributeScoping; 112 113 /** 114 * The path for the node representing the current element. This starts out as the path supplied by the constructor, and never 115 * is shorter than that initial path. 116 */ 117 protected Path currentPath; 118 119 // Recursive map used to track the number of occurrences of names for elements under a particular path 120 private Map<Name, List<IndexedName>> nameToIndexedNamesMap = new HashMap<Name, List<IndexedName>>(); 121 122 // The stack of recursive maps being processed, with the head entry being the map for the current path 123 private final LinkedList<Map<Name, List<IndexedName>>> nameToIndexedNamesMapStack = new LinkedList<Map<Name, List<IndexedName>>>(); 124 125 private String currentEntityName; 126 private StringBuilder cDataContent; 127 private StringBuilder contentBuilder; 128 private final Problems problems; 129 private final Map<String, String> entityValues = new HashMap<String, String>(); 130 131 /** 132 * @param output 133 * @param context 134 * @param nameAttribute 135 * @param defaultPrimaryType 136 * @param textDecoder 137 * @param scoping 138 */ 139 XmlSequencerHandler( SequencerOutput output, 140 SequencerContext context, 141 Name nameAttribute, 142 Name defaultPrimaryType, 143 TextDecoder textDecoder, 144 XmlSequencer.AttributeScoping scoping ) { 145 CheckArg.isNotNull(output, "output"); 146 CheckArg.isNotNull(context, "context"); 147 148 // Use the execution context ... 149 this.output = output; 150 this.context = context; 151 this.problems = context.getProblems(); 152 assert this.problems != null; 153 154 this.nameAttribute = nameAttribute; 155 this.defaultPrimaryType = defaultPrimaryType; 156 this.decoder = textDecoder != null ? textDecoder : DEFAULT_DECODER; 157 this.attributeScoping = scoping != null ? scoping : DEFAULT_ATTRIBUTE_SCOPING; 158 159 // Set up a local namespace registry that is kept in sync with the namespaces found in this XML document ... 160 NamespaceRegistry namespaceRegistry = new LocalNamespaceRegistry(this.context.getNamespaceRegistry()); 161 final ExecutionContext localContext = new BasicExecutionContext(this.context, namespaceRegistry); 162 163 // Set up references to frequently-used objects in the context ... 164 this.nameFactory = localContext.getValueFactories().getNameFactory(); 165 this.pathFactory = localContext.getValueFactories().getPathFactory(); 166 this.propertyFactory = localContext.getPropertyFactory(); 167 this.namespaceRegistry = localContext.getNamespaceRegistry(); 168 assert this.nameFactory != null; 169 assert this.pathFactory != null; 170 assert this.propertyFactory != null; 171 assert this.namespaceRegistry != null; 172 173 // Set up the initial path ... 174 this.currentPath = this.pathFactory.createRelativePath(); 175 assert this.currentPath != null; 176 } 177 178 private void startNode( Name name ) { 179 // Check if content still needs to be output 180 if (contentBuilder != null) endContent(); 181 // Add name to list of indexed names for this element to ensure we use the correct index (which is the size of the 182 // list) 183 List<IndexedName> indexedNames = nameToIndexedNamesMap.get(name); 184 if (indexedNames == null) { 185 indexedNames = new ArrayList<IndexedName>(); 186 nameToIndexedNamesMap.put(name, indexedNames); 187 } 188 IndexedName indexedName = new IndexedName(); 189 indexedNames.add(indexedName); 190 // Add element name and the appropriate index to the path. 191 // Per the JCR spec, the index must be relative to same-name sibling nodes 192 currentPath = pathFactory.create(currentPath, name, indexedNames.size()).getNormalizedPath(); 193 // currentPath = currentPath.getNormalizedPath(); 194 // Add the indexed name map to the stack and set the current map to the new element's map 195 nameToIndexedNamesMapStack.addFirst(nameToIndexedNamesMap); 196 nameToIndexedNamesMap = indexedName.nameToIndexedNamesMap; 197 } 198 199 private void endNode() { 200 // Recover parent's path, namespace, and indexedName map, clearing the ended element's map to free memory 201 currentPath = currentPath.getParent(); 202 currentPath = currentPath.getNormalizedPath(); 203 nameToIndexedNamesMap.clear(); 204 nameToIndexedNamesMap = nameToIndexedNamesMapStack.removeFirst(); 205 } 206 207 /** 208 * See if there is any element content that needs to be completed. 209 */ 210 protected void endContent() { 211 // Process the content of the element ... 212 String content = StringUtil.normalize(contentBuilder.toString()); 213 // Null-out builder to setup for subsequent content. 214 // Must be done before call to startElement below to prevent infinite loop. 215 contentBuilder = null; 216 // Skip if nothing in content but whitespace 217 if (content.length() > 0) { 218 // Create separate node for each content entry since entries can be interspersed amongst child elements 219 startNode(DnaXmlLexicon.ELEMENT_CONTENT); 220 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, DnaXmlLexicon.ELEMENT_CONTENT); 221 output.setProperty(currentPath, DnaXmlLexicon.ELEMENT_CONTENT, content); 222 endNode(); 223 } 224 } 225 226 /** 227 * <p> 228 * {@inheritDoc} 229 * </p> 230 * 231 * @see org.xml.sax.helpers.DefaultHandler#startDocument() 232 */ 233 @Override 234 public void startDocument() { 235 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, DnaXmlLexicon.DOCUMENT); 236 } 237 238 /** 239 * <p> 240 * {@inheritDoc} 241 * </p> 242 * 243 * @see org.xml.sax.ext.DefaultHandler2#startDTD(java.lang.String, java.lang.String, java.lang.String) 244 */ 245 @Override 246 public void startDTD( String name, 247 String publicId, 248 String systemId ) { 249 output.setProperty(currentPath, DnaDtdLexicon.NAME, name); 250 output.setProperty(currentPath, DnaDtdLexicon.PUBLIC_ID, publicId); 251 output.setProperty(currentPath, DnaDtdLexicon.SYSTEM_ID, systemId); 252 } 253 254 /** 255 * <p> 256 * {@inheritDoc} 257 * </p> 258 * 259 * @see org.xml.sax.ext.DefaultHandler2#externalEntityDecl(java.lang.String, java.lang.String, java.lang.String) 260 */ 261 @Override 262 public void externalEntityDecl( String name, 263 String publicId, 264 String systemId ) { 265 // Add "synthetic" entity container to path to help prevent name collisions with XML elements 266 startNode(DnaDtdLexicon.ENTITY); 267 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, DnaDtdLexicon.ENTITY); 268 output.setProperty(currentPath, DnaDtdLexicon.NAME, name); 269 if (publicId != null) output.setProperty(currentPath, DnaDtdLexicon.PUBLIC_ID, publicId); 270 if (systemId != null) output.setProperty(currentPath, DnaDtdLexicon.SYSTEM_ID, systemId); 271 endNode(); 272 } 273 274 /** 275 * {@inheritDoc} 276 * 277 * @see org.xml.sax.ext.DefaultHandler2#internalEntityDecl(java.lang.String, java.lang.String) 278 */ 279 @Override 280 public void internalEntityDecl( String name, 281 String value ) { 282 // Add "synthetic" entity container to path to help prevent name collisions with XML elements 283 startNode(DnaDtdLexicon.ENTITY); 284 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, DnaDtdLexicon.ENTITY); 285 output.setProperty(currentPath, DnaDtdLexicon.NAME, name); 286 output.setProperty(currentPath, DnaDtdLexicon.VALUE, value); 287 // Record the name/value pair ... 288 entityValues.put(name, value); 289 endNode(); 290 } 291 292 /** 293 * <p> 294 * {@inheritDoc} 295 * </p> 296 * 297 * @see org.xml.sax.helpers.DefaultHandler#processingInstruction(java.lang.String, java.lang.String) 298 */ 299 @Override 300 public void processingInstruction( String target, 301 String data ) { 302 // Output separate nodes for each instruction since multiple are allowed 303 startNode(DnaXmlLexicon.PROCESSING_INSTRUCTION); 304 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, DnaXmlLexicon.PROCESSING_INSTRUCTION); 305 output.setProperty(currentPath, DnaXmlLexicon.TARGET, target.trim()); 306 if (data != null) { 307 output.setProperty(currentPath, DnaXmlLexicon.PROCESSING_INSTRUCTION_CONTENT, data.trim()); 308 } 309 endNode(); 310 } 311 312 /** 313 * {@inheritDoc} 314 * <p> 315 * This method ensures that the namespace is registered with the {@link NamespaceRegistry registry}, using the supplied prefix 316 * to register the namespace if required. Note that because this class does not really use the namespace prefixes to create 317 * {@link Name} objects, no attempt is made to match the XML namespace prefixes. 318 * </p> 319 * 320 * @see org.xml.sax.helpers.DefaultHandler#startPrefixMapping(java.lang.String, java.lang.String) 321 */ 322 @Override 323 public void startPrefixMapping( String prefix, 324 String uri ) { 325 assert uri != null; 326 // Add the prefix to the stack ... 327 LinkedList<String> prefixStack = this.prefixStackByUri.get(uri); 328 if (prefixStack == null) { 329 prefixStack = new LinkedList<String>(); 330 this.prefixStackByUri.put(uri, prefixStack); 331 } 332 prefixStack.addFirst(prefix); 333 334 // If the namespace is already registered, then we'll have to register it in the context's registry, too. 335 if (!namespaceRegistry.isRegisteredNamespaceUri(uri)) { 336 // The namespace is not already registered (locally or in the context's registry), so we have to 337 // register it with the context's registry (which the local register then inherits). 338 NamespaceRegistry contextRegistry = context.getNamespaceRegistry(); 339 if (contextRegistry.getNamespaceForPrefix(prefix) != null) { 340 // The prefix is already bound, so register and generate a unique prefix 341 context.getNamespaceRegistry().getPrefixForNamespaceUri(uri, true); 342 // Now register locally with the supplied prefix ... 343 namespaceRegistry.register(prefix, uri); 344 } else { 345 context.getNamespaceRegistry().register(prefix, uri); 346 } 347 } else { 348 // It is already registered, but re-register it locally using the supplied prefix ... 349 namespaceRegistry.register(prefix, uri); 350 } 351 } 352 353 /** 354 * {@inheritDoc} 355 * 356 * @see org.xml.sax.helpers.DefaultHandler#endPrefixMapping(java.lang.String) 357 */ 358 @Override 359 public void endPrefixMapping( String prefix ) { 360 assert prefix != null; 361 // Get the current URI for this prefix ... 362 String uri = namespaceRegistry.getNamespaceForPrefix(prefix); 363 assert uri != null; 364 365 // Get the previous prefix from the stack ... 366 LinkedList<String> prefixStack = this.prefixStackByUri.get(uri); 367 assert prefixStack != null; 368 assert !prefixStack.isEmpty(); 369 String existingPrefix = prefixStack.removeFirst(); 370 assert prefix.equals(existingPrefix); 371 372 // If there are no previous prefixes, then remove the mapping ... 373 if (prefixStack.isEmpty()) { 374 namespaceRegistry.unregister(uri); 375 prefixStackByUri.remove(uri); 376 } else { 377 String previous = prefixStack.getFirst(); 378 namespaceRegistry.register(previous, uri); 379 } 380 } 381 382 /** 383 * <p> 384 * {@inheritDoc} 385 * </p> 386 * 387 * @see org.xml.sax.ext.DefaultHandler2#startEntity(java.lang.String) 388 */ 389 @Override 390 public void startEntity( String name ) { 391 // Record that we've started an entity by capturing the name of the entity ... 392 currentEntityName = name; 393 } 394 395 /** 396 * <p> 397 * {@inheritDoc} 398 * </p> 399 * 400 * @see org.xml.sax.ext.DefaultHandler2#endEntity(java.lang.String) 401 */ 402 @Override 403 public void endEntity( String name ) { 404 // currentEntityName is nulled in 'characters(...)', not here. 405 // See DNA-231 for an issue related to this 406 } 407 408 /** 409 * <p> 410 * {@inheritDoc} 411 * </p> 412 * 413 * @see org.xml.sax.ext.DefaultHandler2#startCDATA() 414 */ 415 @Override 416 public void startCDATA() { 417 // CDATA sections can start in the middle of element content, so there may already be some 418 // element content already processed ... 419 if (contentBuilder != null) endContent(); 420 421 // Prepare builder for concatenating consecutive lines of CDATA 422 cDataContent = new StringBuilder(); 423 } 424 425 /** 426 * {@inheritDoc} 427 * 428 * @see org.xml.sax.ext.DefaultHandler2#endCDATA() 429 */ 430 @Override 431 public void endCDATA() { 432 // Output CDATA built in characters() method 433 startNode(DnaXmlLexicon.CDATA); 434 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, defaultPrimaryType); 435 output.setProperty(currentPath, DnaXmlLexicon.CDATA_CONTENT, cDataContent.toString()); 436 endNode(); 437 // Null-out builder to free memory 438 cDataContent = null; 439 } 440 441 /** 442 * {@inheritDoc} 443 * 444 * @see org.xml.sax.helpers.DefaultHandler#characters(char[], int, int) 445 */ 446 @Override 447 public void characters( char[] ch, 448 int start, 449 int length ) { 450 String content = String.valueOf(ch, start, length); 451 if (cDataContent != null) { 452 // Processing the characters in the CDATA, so add to the builder 453 cDataContent.append(ch, start, length); 454 // Text within builder will be output at the end of CDATA 455 } else { 456 if (contentBuilder == null) { 457 // This is the first line of content, so we have to create the StringBuilder ... 458 contentBuilder = new StringBuilder(); 459 } 460 if (currentEntityName != null) { 461 // This is an entity reference, so rather than use the entity value characters (the content passed 462 // into this method), we want to keep the entity reference ... 463 contentBuilder.append('&').append(currentEntityName).append(';'); 464 465 // Normally, 'characters' is called with just the entity replacement characters, 466 // and is called between 'startEntity' and 'endEntity'. However, per DNA-231, some JVMs 467 // use an incorrect ordering: 'startEntity', 'endEntity' and then 'characters', and the 468 // content passed to the 'characters' call not only includes the entity replacement characters 469 // followed by other content. Look for this condition ... 470 String entityValue = entityValues.get(currentEntityName); 471 if (!content.equals(entityValue) && entityValue != null && entityValue.length() < content.length()) { 472 // Per DNA-231, there's extra content after the entity value. So replace the entity value in the 473 // content with the entity reference (not the replacement characters), and add the extra content ... 474 String extraContent = content.substring(entityValue.length()); 475 contentBuilder.append(extraContent); 476 } 477 // We're done reading the entity characters, so null it out 478 currentEntityName = null; 479 } else { 480 // Just append the content normally ... 481 contentBuilder.append(content); 482 } 483 // Text within builder will be output when another element or CDATA is encountered 484 } 485 } 486 487 /** 488 * {@inheritDoc} 489 * 490 * @see org.xml.sax.ext.DefaultHandler2#comment(char[], int, int) 491 */ 492 @Override 493 public void comment( char[] ch, 494 int start, 495 int length ) { 496 // Output separate nodes for each comment since multiple are allowed 497 startNode(DnaXmlLexicon.COMMENT); 498 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, DnaXmlLexicon.COMMENT); 499 output.setProperty(currentPath, DnaXmlLexicon.COMMENT_CONTENT, String.valueOf(ch, start, length).trim()); 500 endNode(); 501 } 502 503 /** 504 * {@inheritDoc} 505 * 506 * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, 507 * org.xml.sax.Attributes) 508 */ 509 @Override 510 public void startElement( String uri, 511 String localName, 512 String name, 513 Attributes attributes ) { 514 assert localName != null; 515 516 // Create the node with the name built from the element's name ... 517 Name nodeName = null; 518 if (nameAttribute != null) { 519 try { 520 String jcrNameValue = attributes.getValue(nameAttribute.getNamespaceUri(), nameAttribute.getLocalName()); 521 nodeName = nameFactory.create(jcrNameValue); 522 } catch (ValueFormatException e) { 523 } 524 } 525 if (nodeName == null) nodeName = nameFactory.create(uri, localName, decoder); 526 startNode(nodeName); 527 528 // Set the type of the node ... 529 if (defaultPrimaryType != null) { 530 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, defaultPrimaryType); 531 } 532 533 // Now, set each attribute as a property ... 534 for (int i = 0, len = attributes.getLength(); i != len; ++i) { 535 String attributeLocalName = attributes.getLocalName(i); 536 String attributeUri = attributes.getURI(i); 537 Name attributeName = null; 538 if ((attributeUri == null || attributeUri.length() == 0) && attributes.getQName(i).indexOf(':') == -1) { 539 switch (this.attributeScoping) { 540 case INHERIT_ELEMENT_NAMESPACE: 541 attributeName = nameFactory.create(uri, attributeLocalName, decoder); 542 break; 543 case USE_DEFAULT_NAMESPACE: 544 attributeName = nameFactory.create(attributeLocalName, decoder); 545 break; 546 } 547 } else { 548 attributeName = nameFactory.create(attributeUri, attributeLocalName, decoder); 549 } 550 assert attributeName != null; 551 if (JcrLexicon.NAME.equals(attributeName)) { 552 // We don't want to record the "jcr:name" attribute since it won't match the node name ... 553 continue; 554 } 555 Object value = attributes.getValue(i); 556 if (JcrLexicon.PRIMARY_TYPE.equals(attributeName)) { 557 // Convert it to a name ... 558 value = nameFactory.create(value); 559 } 560 output.setProperty(currentPath, attributeName, attributes.getValue(i)); 561 } 562 } 563 564 /** 565 * {@inheritDoc} 566 * 567 * @see org.jboss.dna.graph.xml.XmlHandler#endElement(java.lang.String, java.lang.String, java.lang.String) 568 */ 569 @Override 570 public void endElement( String uri, 571 String localName, 572 String name ) { 573 // Check if content still needs to be output 574 if (contentBuilder != null) endContent(); 575 576 // End the current node ... 577 endNode(); 578 } 579 580 /** 581 * <p> 582 * {@inheritDoc} 583 * </p> 584 * 585 * @see org.xml.sax.helpers.DefaultHandler#warning(org.xml.sax.SAXParseException) 586 */ 587 @Override 588 public void warning( SAXParseException warning ) { 589 problems.addWarning(warning, XmlSequencerI18n.warningSequencingXmlDocument, warning); 590 } 591 592 /** 593 * {@inheritDoc} 594 * 595 * @see org.xml.sax.helpers.DefaultHandler#error(org.xml.sax.SAXParseException) 596 */ 597 @Override 598 public void error( SAXParseException error ) { 599 problems.addError(error, XmlSequencerI18n.errorSequencingXmlDocument, error); 600 } 601 602 /** 603 * {@inheritDoc} 604 * 605 * @see org.xml.sax.helpers.DefaultHandler#fatalError(org.xml.sax.SAXParseException) 606 */ 607 @Override 608 public void fatalError( SAXParseException error ) { 609 problems.addError(error, XmlSequencerI18n.errorSequencingXmlDocument, error); 610 } 611 612 private class IndexedName { 613 614 Map<Name, List<IndexedName>> nameToIndexedNamesMap = new HashMap<Name, List<IndexedName>>(); 615 616 IndexedName() { 617 } 618 } 619 }