001 /* 002 * JBoss, Home of Professional Open Source. 003 * Copyright 2008, Red Hat Middleware LLC, and individual contributors 004 * as indicated by the @author tags. See the copyright.txt file in the 005 * distribution for a full listing of individual contributors. 006 * 007 * This is free software; you can redistribute it and/or modify it 008 * under the terms of the GNU Lesser General Public License as 009 * published by the Free Software Foundation; either version 2.1 of 010 * the License, or (at your option) any later version. 011 * 012 * This software is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * You should have received a copy of the GNU Lesser General Public 018 * License along with this software; if not, write to the Free 019 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 020 * 02110-1301 USA, or see the FSF site: http://www.fsf.org. 021 */ 022 package org.jboss.dna.common.text; 023 024 import java.text.CharacterIterator; 025 import java.text.StringCharacterIterator; 026 import java.util.BitSet; 027 028 /** 029 * An encoder useful for converting text to be used within a URL, as defined by Section 2.3 of <a 030 * href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>. Note that this class does not encode a complete URL ({@link java.net.URLEncoder} 031 * and {@link java.net.URLDecoder} should be used for such purposes). 032 * 033 * @author Randall Hauch 034 */ 035 public class UrlEncoder implements TextEncoder, TextDecoder { 036 037 /** 038 * Data characters that are allowed in a URI but do not have a reserved purpose are called unreserved. These include upper and 039 * lower case letters, decimal digits, and a limited set of punctuation marks and symbols. 040 * 041 * <pre> 042 * unreserved = alphanum | mark 043 * mark = "-" | "_" | "." | "!" | "˜" | "*" | "'" | "(" | ")" 044 * </pre> 045 * 046 * Unreserved characters can be escaped without changing the semantics of the URI, but this should not be done unless the URI 047 * is being used in a context that does not allow the unescaped character to appear. 048 */ 049 private static final BitSet RFC2396_UNRESERVED_CHARACTERS = new BitSet(256); 050 private static final BitSet RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS; 051 052 public static final char ESCAPE_CHARACTER = '%'; 053 054 static { 055 RFC2396_UNRESERVED_CHARACTERS.set('a', 'z' + 1); 056 RFC2396_UNRESERVED_CHARACTERS.set('A', 'Z' + 1); 057 RFC2396_UNRESERVED_CHARACTERS.set('0', '9' + 1); 058 RFC2396_UNRESERVED_CHARACTERS.set('-'); 059 RFC2396_UNRESERVED_CHARACTERS.set('_'); 060 RFC2396_UNRESERVED_CHARACTERS.set('.'); 061 RFC2396_UNRESERVED_CHARACTERS.set('!'); 062 RFC2396_UNRESERVED_CHARACTERS.set('~'); 063 RFC2396_UNRESERVED_CHARACTERS.set('*'); 064 RFC2396_UNRESERVED_CHARACTERS.set('\''); 065 RFC2396_UNRESERVED_CHARACTERS.set('('); 066 RFC2396_UNRESERVED_CHARACTERS.set(')'); 067 068 RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS = (BitSet)RFC2396_UNRESERVED_CHARACTERS.clone(); 069 RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS.set('/'); 070 } 071 072 private boolean slashEncoded = true; 073 074 /** 075 * {@inheritDoc} 076 */ 077 public String encode( String text ) { 078 if (text == null) return null; 079 if (text.length() == 0) return text; 080 final BitSet safeChars = isSlashEncoded() ? RFC2396_UNRESERVED_CHARACTERS : RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS; 081 final StringBuilder result = new StringBuilder(); 082 final CharacterIterator iter = new StringCharacterIterator(text); 083 for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) { 084 if (safeChars.get(c)) { 085 // Safe character, so just pass through ... 086 result.append(c); 087 } else { 088 // The character is not a safe character, and must be escaped ... 089 result.append(ESCAPE_CHARACTER); 090 result.append(Character.toLowerCase(Character.forDigit(c / 16, 16))); 091 result.append(Character.toLowerCase(Character.forDigit(c % 16, 16))); 092 } 093 } 094 return result.toString(); 095 } 096 097 /** 098 * {@inheritDoc} 099 */ 100 public String decode( String encodedText ) { 101 if (encodedText == null) return null; 102 if (encodedText.length() == 0) return encodedText; 103 final StringBuilder result = new StringBuilder(); 104 final CharacterIterator iter = new StringCharacterIterator(encodedText); 105 for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) { 106 if (c == ESCAPE_CHARACTER) { 107 boolean foundEscapedCharacter = false; 108 // Found the first character in a potential escape sequence, so grab the next two characters ... 109 char hexChar1 = iter.next(); 110 char hexChar2 = hexChar1 != CharacterIterator.DONE ? iter.next() : CharacterIterator.DONE; 111 if (hexChar2 != CharacterIterator.DONE) { 112 // We found two more characters, but ensure they form a valid hexadecimal number ... 113 int hexNum1 = Character.digit(hexChar1, 16); 114 int hexNum2 = Character.digit(hexChar2, 16); 115 if (hexNum1 > -1 && hexNum2 > -1) { 116 foundEscapedCharacter = true; 117 result.append((char)(hexNum1 * 16 + hexNum2)); 118 } 119 } 120 if (!foundEscapedCharacter) { 121 result.append(c); 122 if (hexChar1 != CharacterIterator.DONE) result.append(hexChar1); 123 if (hexChar2 != CharacterIterator.DONE) result.append(hexChar2); 124 } 125 } else { 126 result.append(c); 127 } 128 } 129 return result.toString(); 130 } 131 132 /** 133 * @return slashEncoded 134 */ 135 public boolean isSlashEncoded() { 136 return this.slashEncoded; 137 } 138 139 /** 140 * @param slashEncoded Sets slashEncoded to the specified value. 141 * @return this object, for method chaining 142 */ 143 public UrlEncoder setSlashEncoded( boolean slashEncoded ) { 144 this.slashEncoded = slashEncoded; 145 return this; 146 } 147 148 }