001    /*
002     * JBoss DNA (http://www.jboss.org/dna)
003     * See the COPYRIGHT.txt file distributed with this work for information
004     * regarding copyright ownership.  Some portions may be licensed
005     * to Red Hat, Inc. under one or more contributor license agreements.
006     * See the AUTHORS.txt file in the distribution for a full listing of 
007     * individual contributors.
008     *
009     * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA
010     * is licensed to you under the terms of the GNU Lesser General Public License as
011     * published by the Free Software Foundation; either version 2.1 of
012     * the License, or (at your option) any later version.
013     * 
014     * JBoss DNA is distributed in the hope that it will be useful,
015     * but WITHOUT ANY WARRANTY; without even the implied warranty of
016     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
017     * Lesser General Public License for more details.
018     *
019     * You should have received a copy of the GNU Lesser General Public
020     * License along with this software; if not, write to the Free
021     * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
022     * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
023     */
024    package org.jboss.dna.common.text;
025    
026    import java.text.CharacterIterator;
027    import java.text.StringCharacterIterator;
028    import java.util.HashMap;
029    import java.util.Map;
030    
031    /**
032     * An encoder useful for converting text to be used within XML attribute values.
033     * The following translations will be performed:
034     * <table cellspacing="0" cellpadding="1" border="1">
035     * <tr>
036     * <th>Raw (Unencoded)<br/>Character</th>
037     * <th>Translated (Encoded)<br/>Entity</th>
038     * </tr>
039     * <tr>
040     * <td> &amp; </td>
041     * <td> &amp;amp; </td>
042     * </tr>
043     * <tr>
044     * <td> &lt; </td>
045     * <td> &amp;lt; </td>
046     * </tr>
047     * <tr>
048     * <td> &gt; </td>
049     * <td> &amp;gt; </td>
050     * </tr>
051     * <tr>
052     * <td> &quot; </td>
053     * <td> &amp;quot; </td>
054     * </tr>
055     * <tr>
056     * <td> &#039; </td>
057     * <td> &amp;#039; </td>
058     * </tr>
059     * <tr>
060     * <td>All Others</td>
061     * <td>No Translation</td>
062     * </tr>
063     * </table>
064     * </p>
065     */
066    public class XmlValueEncoder implements TextEncoder, TextDecoder {
067        
068        private static final Map<String, Character> SPECIAL_ENTITIES;
069        
070        static {
071            SPECIAL_ENTITIES = new HashMap<String, Character>();
072            
073            SPECIAL_ENTITIES.put("quot", '"');
074            SPECIAL_ENTITIES.put("gt", '>');
075            SPECIAL_ENTITIES.put("lt", '<');
076            SPECIAL_ENTITIES.put("amp", '&');
077            
078        }
079        
080        /**
081         * {@inheritDoc}
082         *
083         * @see org.jboss.dna.common.text.TextEncoder#encode(java.lang.String)
084         */
085        public String encode( String text ) {
086            if (text == null) return null;
087            StringBuilder sb = new StringBuilder();
088            CharacterIterator iter = new StringCharacterIterator(text);
089            for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
090                switch (c) {
091                    case '&':
092                        sb.append("&amp;");
093                        break;
094                    case '"':
095                        sb.append("&quot;");
096                        break;
097                    case '<':
098                        sb.append("&lt;");
099                        break;
100                    case '>':
101                        sb.append("&gt;");
102                        break;
103                    case '\'':
104                        sb.append("&#039;");
105                        break;
106                    default:
107                        sb.append(c);
108                }
109            }
110            return sb.toString();
111        }
112    
113        /**
114         * {@inheritDoc}
115         *
116         * @see org.jboss.dna.common.text.TextDecoder#decode(java.lang.String)
117         */
118        public String decode( String encodedText ) {
119            if (encodedText == null) return null;
120            StringBuilder sb = new StringBuilder();
121            CharacterIterator iter = new StringCharacterIterator(encodedText);
122            for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
123                if (c == '&') {
124                    int index = iter.getIndex();
125                    
126                    do {
127                        c = iter.next();
128                    }
129                    while (c != CharacterIterator.DONE && c != ';');
130    
131                    // We found a closing semicolon
132                    if (c == ';') {
133                        String s = encodedText.substring(index + 1, iter.getIndex());
134                        
135                        if (SPECIAL_ENTITIES.containsKey(s)) {
136                            sb.append(SPECIAL_ENTITIES.get(s));
137                            continue;
138                            
139                        }
140                        
141                        if (s.length() > 0 && s.charAt(0) == '#') {
142                            try {
143                                sb.append((char) Short.parseShort(s.substring(1, s.length())));
144                                continue;
145                            }
146                            catch (NumberFormatException nfe) {
147                                // This is possible in malformed encodings, but let it fall through
148                            }
149                        }
150                    }
151                    
152                    // Malformed encoding, restore state and pass poorly encoded data back
153                    c = '&';
154                    iter.setIndex(index);                            
155                }
156    
157                sb.append(c);
158    
159            }
160            return sb.toString();
161        }
162    }