001 /* 002 * JBoss, Home of Professional Open Source. 003 * Copyright 2008, Red Hat Middleware LLC, and individual contributors 004 * as indicated by the @author tags. See the copyright.txt file in the 005 * distribution for a full listing of individual contributors. 006 * 007 * This is free software; you can redistribute it and/or modify it 008 * under the terms of the GNU Lesser General Public License as 009 * published by the Free Software Foundation; either version 2.1 of 010 * the License, or (at your option) any later version. 011 * 012 * This software is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * You should have received a copy of the GNU Lesser General Public 018 * License along with this software; if not, write to the Free 019 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 020 * 02110-1301 USA, or see the FSF site: http://www.fsf.org. 021 */ 022 package org.jboss.dna.common.util; 023 024 /** 025 * <p> 026 * Encodes and decodes to and from Base64 notation. 027 * </p> 028 * <p> 029 * Homepage: <a href="http://iharder.net/base64">http://iharder.net/base64</a>. 030 * </p> 031 * <p> 032 * The <tt>options</tt> parameter, which appears in a few places, is used to pass several pieces of information to the encoder. In 033 * the "higher level" methods such as encodeBytes( bytes, options ) the options parameter can be used to indicate such things as 034 * first gzipping the bytes before encoding them, not inserting linefeeds (though that breaks strict Base64 compatibility), and 035 * encoding using the URL-safe and Ordered dialects. 036 * </p> 037 * <p> 038 * The constants defined in Base64 can be OR-ed together to combine options, so you might make a call like this: 039 * </p> 040 * <code>String encoded = Base64.encodeBytes( mybytes, Base64.GZIP | Base64.DONT_BREAK_LINES );</code> 041 * <p> 042 * to compress the data before encoding it and then making the output have no newline characters. 043 * </p> 044 * <p> 045 * Change Log: 046 * </p> 047 * <ul> 048 * <li>v2.2.2 - Fixed encodeFileToFile and decodeFileToFile to use the Base64.InputStream class to encode and decode on the fly 049 * which uses less memory than encoding/decoding an entire file into memory before writing.</li> 050 * <li>v2.2.1 - Fixed bug using URL_SAFE and ORDERED encodings. Fixed bug when using very small files (~< 40 bytes).</li> 051 * <li>v2.2 - Added some helper methods for encoding/decoding directly from one file to the next. Also added a main() method to 052 * support command line encoding/decoding from one file to the next. Also added these Base64 dialects: 053 * <ol> 054 * <li>The default is RFC3548 format.</li> 055 * <li>Calling Base64.setFormat(Base64.BASE64_FORMAT.URLSAFE_FORMAT) generates URL and file name friendly format as described in 056 * Section 4 of RFC3548. http://www.faqs.org/rfcs/rfc3548.html</li> 057 * <li>Calling Base64.setFormat(Base64.BASE64_FORMAT.ORDERED_FORMAT) generates URL and file name friendly format that preserves 058 * lexical ordering as described in http://www.faqs.org/qa/rfcc-1940.html</li> 059 * </ol> 060 * Special thanks to Jim Kellerman at <a href="http://www.powerset.com/">http://www.powerset.com/</a> for contributing the new 061 * Base64 dialects.</li> 062 * <li>v2.1 - Cleaned up javadoc comments and unused variables and methods. Added some convenience methods for reading and writing 063 * to and from files.</li> 064 * <li>v2.0.2 - Now specifies UTF-8 encoding in places where the code fails on systems with other encodings (like EBCDIC).</li> 065 * <li>v2.0.1 - Fixed an error when decoding a single byte, that is, when the encoded data was a single byte.</li> 066 * <li>v2.0 - I got rid of methods that used booleans to set options. Now everything is more consolidated and cleaner. The code 067 * now detects when data that's being decoded is gzip-compressed and will decompress it automatically. Generally things are 068 * cleaner. You'll probably have to change some method calls that you were making to support the new options format (<tt>int</tt>s 069 * that you "OR" together).</li> 070 * <li>v1.5.1 - Fixed bug when decompressing and decoding to a byte[] using <tt>decode( String s, boolean gzipCompressed )</tt>. 071 * Added the ability to "suspend" encoding in the Output Stream so you can turn on and off the encoding if you need to embed 072 * base64 data in an otherwise "normal" stream (like an XML file).</li> 073 * <li>v1.5 - Output stream pases on flush() command but doesn't do anything itself. This helps when using GZIP streams. Added the 074 * ability to GZip-compress objects before encoding them.</li> 075 * <li>v1.4 - Added helper methods to read/write files.</li> 076 * <li>v1.3.6 - Fixed OutputStream.flush() so that 'position' is reset.</li> 077 * <li>v1.3.5 - Added flag to turn on and off line breaks. Fixed bug in input stream where last buffer being read, if not 078 * completely full, was not returned.</li> 079 * <li>v1.3.4 - Fixed when "improperly padded stream" error was thrown at the wrong time.</li> 080 * <li>v1.3.3 - Fixed I/O streams which were totally messed up.</li> 081 * </ul> 082 * <p> 083 * I am placing this code in the Public Domain. Do with it as you will. This software comes with no guarantees or warranties but 084 * with plenty of well-wishing instead! Please visit <a href="http://iharder.net/base64">http://iharder.net/base64</a> 085 * periodically to check for updates or to contribute improvements. 086 * </p> 087 * 088 * @author Robert Harder 089 * @author rob@iharder.net 090 * @version 2.2.2 091 */ 092 public class Base64 { 093 094 /* ******** P R I V A T E F I E L D S ******** */ 095 096 /** Maximum line length (76) of Base64 output. */ 097 private final static int MAX_LINE_LENGTH = 76; 098 099 /** The equals sign (=) as a byte. */ 100 private final static byte EQUALS_SIGN = (byte)'='; 101 102 /** The new line character (\n) as a byte. */ 103 private final static byte NEW_LINE = (byte)'\n'; 104 105 /** Preferred encoding. */ 106 private final static String PREFERRED_ENCODING = "UTF-8"; 107 108 private final static byte WHITE_SPACE_ENC = -5; // Indicates white space in encoding 109 private final static byte EQUALS_SIGN_ENC = -1; // Indicates equals sign in encoding 110 111 /* ******** S T A N D A R D B A S E 6 4 A L P H A B E T ******** */ 112 113 /** The 64 valid Base64 values. */ 114 /* Host platform me be something funny like EBCDIC, so we hardcode these values. */ 115 private final static byte[] _STANDARD_ALPHABET = {(byte)'A', (byte)'B', (byte)'C', (byte)'D', (byte)'E', (byte)'F', 116 (byte)'G', (byte)'H', (byte)'I', (byte)'J', (byte)'K', (byte)'L', (byte)'M', (byte)'N', (byte)'O', (byte)'P', (byte)'Q', 117 (byte)'R', (byte)'S', (byte)'T', (byte)'U', (byte)'V', (byte)'W', (byte)'X', (byte)'Y', (byte)'Z', (byte)'a', (byte)'b', 118 (byte)'c', (byte)'d', (byte)'e', (byte)'f', (byte)'g', (byte)'h', (byte)'i', (byte)'j', (byte)'k', (byte)'l', (byte)'m', 119 (byte)'n', (byte)'o', (byte)'p', (byte)'q', (byte)'r', (byte)'s', (byte)'t', (byte)'u', (byte)'v', (byte)'w', (byte)'x', 120 (byte)'y', (byte)'z', (byte)'0', (byte)'1', (byte)'2', (byte)'3', (byte)'4', (byte)'5', (byte)'6', (byte)'7', (byte)'8', 121 (byte)'9', (byte)'+', (byte)'/'}; 122 123 /** 124 * Translates a Base64 value to either its 6-bit reconstruction value or a negative number indicating some other meaning. 125 **/ 126 private final static byte[] _STANDARD_DECODABET = {-9, -9, -9, -9, -9, -9, -9, -9, -9, // Decimal 0 - 8 127 -5, -5, // Whitespace: Tab and Linefeed 128 -9, -9, // Decimal 11 - 12 129 -5, // Whitespace: Carriage Return 130 -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, // Decimal 14 - 26 131 -9, -9, -9, -9, -9, // Decimal 27 - 31 132 -5, // Whitespace: Space 133 -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, // Decimal 33 - 42 134 62, // Plus sign at decimal 43 135 -9, -9, -9, // Decimal 44 - 46 136 63, // Slash at decimal 47 137 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, // Numbers zero through nine 138 -9, -9, -9, // Decimal 58 - 60 139 -1, // Equals sign at decimal 61 140 -9, -9, -9, // Decimal 62 - 64 141 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, // Letters 'A' through 'N' 142 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // Letters 'O' through 'Z' 143 -9, -9, -9, -9, -9, -9, // Decimal 91 - 96 144 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, // Letters 'a' through 'm' 145 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, // Letters 'n' through 'z' 146 -9, -9, -9, -9 // Decimal 123 - 126 147 /*,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9, // Decimal 127 - 139 148 -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9, // Decimal 140 - 152 149 -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9, // Decimal 153 - 165 150 -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9, // Decimal 166 - 178 151 -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9, // Decimal 179 - 191 152 -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9, // Decimal 192 - 204 153 -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9, // Decimal 205 - 217 154 -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9, // Decimal 218 - 230 155 -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9, // Decimal 231 - 243 156 -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9 // Decimal 244 - 255 */ 157 }; 158 159 /** Defeats instantiation. */ 160 private Base64() { 161 } 162 163 /* ******** E N C O D I N G M E T H O D S ******** */ 164 165 /** 166 * <p> 167 * Encodes up to three bytes of the array <var>source</var> and writes the resulting four Base64 bytes to 168 * <var>destination</var>. The source and destination arrays can be manipulated anywhere along their length by specifying 169 * <var>srcOffset</var> and <var>destOffset</var>. This method does not check to make sure your arrays are large enough to 170 * accomodate <var>srcOffset</var> + 3 for the <var>source</var> array or <var>destOffset</var> + 4 for the 171 * <var>destination</var> array. The actual number of significant bytes in your array is given by <var>numSigBytes</var>. 172 * </p> 173 * <p> 174 * This is the lowest level of the encoding methods with all possible parameters. 175 * </p> 176 * 177 * @param source the array to convert 178 * @param srcOffset the index where conversion begins 179 * @param numSigBytes the number of significant bytes in your array 180 * @param destination the array to hold the conversion 181 * @param destOffset the index where output will be put 182 * @return the <var>destination</var> array 183 * @since 1.3 184 */ 185 private static byte[] encode3to4( byte[] source, 186 int srcOffset, 187 int numSigBytes, 188 byte[] destination, 189 int destOffset ) { 190 byte[] ALPHABET = _STANDARD_ALPHABET; 191 192 // 1 2 3 193 // 01234567890123456789012345678901 Bit position 194 // --------000000001111111122222222 Array position from threeBytes 195 // --------| || || || | Six bit groups to index ALPHABET 196 // >>18 >>12 >> 6 >> 0 Right shift necessary 197 // 0x3f 0x3f 0x3f Additional AND 198 199 // Create buffer with zero-padding if there are only one or two 200 // significant bytes passed in the array. 201 // We have to shift left 24 in order to flush out the 1's that appear 202 // when Java treats a value as negative that is cast from a byte to an int. 203 int inBuff = (numSigBytes > 0 ? ((source[srcOffset] << 24) >>> 8) : 0) 204 | (numSigBytes > 1 ? ((source[srcOffset + 1] << 24) >>> 16) : 0) 205 | (numSigBytes > 2 ? ((source[srcOffset + 2] << 24) >>> 24) : 0); 206 207 switch (numSigBytes) { 208 case 3: 209 destination[destOffset] = ALPHABET[(inBuff >>> 18)]; 210 destination[destOffset + 1] = ALPHABET[(inBuff >>> 12) & 0x3f]; 211 destination[destOffset + 2] = ALPHABET[(inBuff >>> 6) & 0x3f]; 212 destination[destOffset + 3] = ALPHABET[(inBuff) & 0x3f]; 213 return destination; 214 215 case 2: 216 destination[destOffset] = ALPHABET[(inBuff >>> 18)]; 217 destination[destOffset + 1] = ALPHABET[(inBuff >>> 12) & 0x3f]; 218 destination[destOffset + 2] = ALPHABET[(inBuff >>> 6) & 0x3f]; 219 destination[destOffset + 3] = EQUALS_SIGN; 220 return destination; 221 222 case 1: 223 destination[destOffset] = ALPHABET[(inBuff >>> 18)]; 224 destination[destOffset + 1] = ALPHABET[(inBuff >>> 12) & 0x3f]; 225 destination[destOffset + 2] = EQUALS_SIGN; 226 destination[destOffset + 3] = EQUALS_SIGN; 227 return destination; 228 229 default: 230 return destination; 231 } // end switch 232 } // end encode3to4 233 234 /** 235 * Encodes a byte array into Base64 notation. Does not GZip-compress data. 236 * 237 * @param source The data to convert 238 * @return the encoded data 239 * @since 1.4 240 */ 241 public static String encodeBytes( byte[] source ) { 242 // Convert option to boolean in way that code likes it. 243 boolean breakLines = false; 244 int len = source.length; 245 int len43 = len * 4 / 3; 246 byte[] outBuff = new byte[(len43) // Main 4:3 247 + ((len % 3) > 0 ? 4 : 0) // Account for padding 248 + (breakLines ? (len43 / MAX_LINE_LENGTH) : 0)]; // New lines 249 int d = 0; 250 int e = 0; 251 int len2 = len - 2; 252 int lineLength = 0; 253 for (; d < len2; d += 3, e += 4) { 254 encode3to4(source, d, 3, outBuff, e); 255 256 lineLength += 4; 257 if (breakLines && lineLength == MAX_LINE_LENGTH) { 258 outBuff[e + 4] = NEW_LINE; 259 e++; 260 lineLength = 0; 261 } // end if: end of line 262 } // en dfor: each piece of array 263 264 if (d < len) { 265 encode3to4(source, d, len - d, outBuff, e); 266 e += 4; 267 } // end if: some padding needed 268 269 // Return value according to relevant encoding. 270 try { 271 return new String(outBuff, 0, e, PREFERRED_ENCODING); 272 } // end try 273 catch (java.io.UnsupportedEncodingException uue) { 274 return new String(outBuff, 0, e); 275 } // end catch 276 277 } // end else: don't compress 278 279 /* ******** D E C O D I N G M E T H O D S ******** */ 280 281 /** 282 * Decodes four bytes from array <var>source</var> and writes the resulting bytes (up to three of them) to 283 * <var>destination</var>. The source and destination arrays can be manipulated anywhere along their length by specifying 284 * <var>srcOffset</var> and <var>destOffset</var>. This method does not check to make sure your arrays are large enough to 285 * accomodate <var>srcOffset</var> + 4 for the <var>source</var> array or <var>destOffset</var> + 3 for the 286 * <var>destination</var> array. This method returns the actual number of bytes that were converted from the Base64 encoding. 287 * <p> 288 * This is the lowest level of the decoding methods with all possible parameters. 289 * </p> 290 * 291 * @param source the array to convert 292 * @param srcOffset the index where conversion begins 293 * @param destination the array to hold the conversion 294 * @param destOffset destination offset 295 * @return the number of decoded bytes converted 296 * @since 1.3 297 */ 298 private static int decode4to3( byte[] source, 299 int srcOffset, 300 byte[] destination, 301 int destOffset ) { 302 byte[] DECODABET = _STANDARD_DECODABET; 303 304 // Example: Dk== 305 if (source[srcOffset + 2] == EQUALS_SIGN) { 306 // Two ways to do the same thing. Don't know which way I like best. 307 // int outBuff = ( ( DECODABET[ source[ srcOffset ] ] << 24 ) >>> 6 ) 308 // | ( ( DECODABET[ source[ srcOffset + 1] ] << 24 ) >>> 12 ); 309 int outBuff = ((DECODABET[source[srcOffset]] & 0xFF) << 18) | ((DECODABET[source[srcOffset + 1]] & 0xFF) << 12); 310 311 destination[destOffset] = (byte)(outBuff >>> 16); 312 return 1; 313 } 314 315 // Example: DkL= 316 else if (source[srcOffset + 3] == EQUALS_SIGN) { 317 // Two ways to do the same thing. Don't know which way I like best. 318 // int outBuff = ( ( DECODABET[ source[ srcOffset ] ] << 24 ) >>> 6 ) 319 // | ( ( DECODABET[ source[ srcOffset + 1 ] ] << 24 ) >>> 12 ) 320 // | ( ( DECODABET[ source[ srcOffset + 2 ] ] << 24 ) >>> 18 ); 321 int outBuff = ((DECODABET[source[srcOffset]] & 0xFF) << 18) | ((DECODABET[source[srcOffset + 1]] & 0xFF) << 12) 322 | ((DECODABET[source[srcOffset + 2]] & 0xFF) << 6); 323 324 destination[destOffset] = (byte)(outBuff >>> 16); 325 destination[destOffset + 1] = (byte)(outBuff >>> 8); 326 return 2; 327 } 328 329 // Example: DkLE 330 else { 331 // Two ways to do the same thing. Don't know which way I like best. 332 // int outBuff = ( ( DECODABET[ source[ srcOffset ] ] << 24 ) >>> 6 ) 333 // | ( ( DECODABET[ source[ srcOffset + 1 ] ] << 24 ) >>> 12 ) 334 // | ( ( DECODABET[ source[ srcOffset + 2 ] ] << 24 ) >>> 18 ) 335 // | ( ( DECODABET[ source[ srcOffset + 3 ] ] << 24 ) >>> 24 ); 336 int outBuff = ((DECODABET[source[srcOffset]] & 0xFF) << 18) | ((DECODABET[source[srcOffset + 1]] & 0xFF) << 12) 337 | ((DECODABET[source[srcOffset + 2]] & 0xFF) << 6) | ((DECODABET[source[srcOffset + 3]] & 0xFF)); 338 339 destination[destOffset] = (byte)(outBuff >> 16); 340 destination[destOffset + 1] = (byte)(outBuff >> 8); 341 destination[destOffset + 2] = (byte)(outBuff); 342 343 return 3; 344 } 345 } // end decodeToBytes 346 347 /** 348 * Decodes data from Base64 notation. 349 * 350 * @param s the string to decode 351 * @return the decoded data 352 * @since 1.4 353 */ 354 public static byte[] decode( String s ) { 355 byte[] source; 356 try { 357 source = s.getBytes(PREFERRED_ENCODING); 358 } // end try 359 catch (java.io.UnsupportedEncodingException uee) { 360 source = s.getBytes(); 361 } // end catch 362 // </change> 363 if (source.length % 4 != 0) { 364 throw new IllegalArgumentException("Source bytes are not valid"); //$NON-NLS-1$ 365 } 366 byte[] DECODABET = _STANDARD_DECODABET; 367 int len = source.length; 368 byte[] outBuff = new byte[len * 3 / 4]; // Upper limit on size of output 369 int outBuffPosn = 0; 370 371 byte[] b4 = new byte[4]; 372 int b4Posn = 0; 373 int i = 0; 374 byte sbiCrop = 0; 375 byte sbiDecode = 0; 376 for (i = 0; i < len; i++) { 377 sbiCrop = (byte)(source[i] & 0x7f); // Only the low seven bits 378 sbiDecode = DECODABET[sbiCrop]; 379 380 if (sbiDecode >= WHITE_SPACE_ENC) // White space, Equals sign or better 381 { 382 if (sbiDecode >= EQUALS_SIGN_ENC) { 383 b4[b4Posn++] = sbiCrop; 384 if (b4Posn > 3) { 385 outBuffPosn += decode4to3(b4, 0, outBuff, outBuffPosn); 386 b4Posn = 0; 387 388 // If that was the equals sign, break out of 'for' loop 389 if (sbiCrop == EQUALS_SIGN) break; 390 } // end if: quartet built 391 392 } // end if: equals sign or better 393 394 } // end if: white space, equals sign or better 395 else { 396 throw new IllegalArgumentException("Source bytes are not valid"); //$NON-NLS-1$ 397 } // end else: 398 } // each input character 399 400 byte[] out = new byte[outBuffPosn]; 401 System.arraycopy(outBuff, 0, out, 0, outBuffPosn); 402 return out; 403 } // end decode 404 }