001 /* 002 * JBoss DNA (http://www.jboss.org/dna) 003 * See the COPYRIGHT.txt file distributed with this work for information 004 * regarding copyright ownership. Some portions may be licensed 005 * to Red Hat, Inc. under one or more contributor license agreements. 006 * See the AUTHORS.txt file in the distribution for a full listing of 007 * individual contributors. 008 * 009 * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA 010 * is licensed to you under the terms of the GNU Lesser General Public License as 011 * published by the Free Software Foundation; either version 2.1 of 012 * the License, or (at your option) any later version. 013 * 014 * JBoss DNA is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 017 * Lesser General Public License for more details. 018 * 019 * You should have received a copy of the GNU Lesser General Public 020 * License along with this software; if not, write to the Free 021 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 022 * 02110-1301 USA, or see the FSF site: http://www.fsf.org. 023 */ 024 package org.jboss.dna.common.text; 025 026 import java.text.CharacterIterator; 027 import java.text.StringCharacterIterator; 028 import java.util.BitSet; 029 030 /** 031 * An encoder useful for converting text to be used within a URL, as defined by Section 2.3 of <a 032 * href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>. Note that this class does not encode a complete URL ({@link java.net.URLEncoder} 033 * and {@link java.net.URLDecoder} should be used for such purposes). 034 * 035 * @author Randall Hauch 036 */ 037 public class UrlEncoder implements TextEncoder, TextDecoder { 038 039 /** 040 * Data characters that are allowed in a URI but do not have a reserved purpose are called unreserved. These include upper and 041 * lower case letters, decimal digits, and a limited set of punctuation marks and symbols. 042 * 043 * <pre> 044 * unreserved = alphanum | mark 045 * mark = "-" | "_" | "." | "!" | "˜" | "*" | "'" | "(" | ")" 046 * </pre> 047 * 048 * Unreserved characters can be escaped without changing the semantics of the URI, but this should not be done unless the URI 049 * is being used in a context that does not allow the unescaped character to appear. 050 */ 051 private static final BitSet RFC2396_UNRESERVED_CHARACTERS = new BitSet(256); 052 private static final BitSet RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS; 053 054 public static final char ESCAPE_CHARACTER = '%'; 055 056 static { 057 RFC2396_UNRESERVED_CHARACTERS.set('a', 'z' + 1); 058 RFC2396_UNRESERVED_CHARACTERS.set('A', 'Z' + 1); 059 RFC2396_UNRESERVED_CHARACTERS.set('0', '9' + 1); 060 RFC2396_UNRESERVED_CHARACTERS.set('-'); 061 RFC2396_UNRESERVED_CHARACTERS.set('_'); 062 RFC2396_UNRESERVED_CHARACTERS.set('.'); 063 RFC2396_UNRESERVED_CHARACTERS.set('!'); 064 RFC2396_UNRESERVED_CHARACTERS.set('~'); 065 RFC2396_UNRESERVED_CHARACTERS.set('*'); 066 RFC2396_UNRESERVED_CHARACTERS.set('\''); 067 RFC2396_UNRESERVED_CHARACTERS.set('('); 068 RFC2396_UNRESERVED_CHARACTERS.set(')'); 069 070 RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS = (BitSet)RFC2396_UNRESERVED_CHARACTERS.clone(); 071 RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS.set('/'); 072 } 073 074 private boolean slashEncoded = true; 075 076 /** 077 * {@inheritDoc} 078 */ 079 public String encode( String text ) { 080 if (text == null) return null; 081 if (text.length() == 0) return text; 082 final BitSet safeChars = isSlashEncoded() ? RFC2396_UNRESERVED_CHARACTERS : RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS; 083 final StringBuilder result = new StringBuilder(); 084 final CharacterIterator iter = new StringCharacterIterator(text); 085 for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) { 086 if (safeChars.get(c)) { 087 // Safe character, so just pass through ... 088 result.append(c); 089 } else { 090 // The character is not a safe character, and must be escaped ... 091 result.append(ESCAPE_CHARACTER); 092 result.append(Character.toLowerCase(Character.forDigit(c / 16, 16))); 093 result.append(Character.toLowerCase(Character.forDigit(c % 16, 16))); 094 } 095 } 096 return result.toString(); 097 } 098 099 /** 100 * {@inheritDoc} 101 */ 102 public String decode( String encodedText ) { 103 if (encodedText == null) return null; 104 if (encodedText.length() == 0) return encodedText; 105 final StringBuilder result = new StringBuilder(); 106 final CharacterIterator iter = new StringCharacterIterator(encodedText); 107 for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) { 108 if (c == ESCAPE_CHARACTER) { 109 boolean foundEscapedCharacter = false; 110 // Found the first character in a potential escape sequence, so grab the next two characters ... 111 char hexChar1 = iter.next(); 112 char hexChar2 = hexChar1 != CharacterIterator.DONE ? iter.next() : CharacterIterator.DONE; 113 if (hexChar2 != CharacterIterator.DONE) { 114 // We found two more characters, but ensure they form a valid hexadecimal number ... 115 int hexNum1 = Character.digit(hexChar1, 16); 116 int hexNum2 = Character.digit(hexChar2, 16); 117 if (hexNum1 > -1 && hexNum2 > -1) { 118 foundEscapedCharacter = true; 119 result.append((char)(hexNum1 * 16 + hexNum2)); 120 } 121 } 122 if (!foundEscapedCharacter) { 123 result.append(c); 124 if (hexChar1 != CharacterIterator.DONE) result.append(hexChar1); 125 if (hexChar2 != CharacterIterator.DONE) result.append(hexChar2); 126 } 127 } else { 128 result.append(c); 129 } 130 } 131 return result.toString(); 132 } 133 134 /** 135 * @return slashEncoded 136 */ 137 public boolean isSlashEncoded() { 138 return this.slashEncoded; 139 } 140 141 /** 142 * @param slashEncoded Sets slashEncoded to the specified value. 143 * @return this object, for method chaining 144 */ 145 public UrlEncoder setSlashEncoded( boolean slashEncoded ) { 146 this.slashEncoded = slashEncoded; 147 return this; 148 } 149 150 }