001    /*
002     * JBoss, Home of Professional Open Source.
003     * Copyright 2008, Red Hat Middleware LLC, and individual contributors
004     * as indicated by the @author tags. See the copyright.txt file in the
005     * distribution for a full listing of individual contributors. 
006     *
007     * This is free software; you can redistribute it and/or modify it
008     * under the terms of the GNU Lesser General Public License as
009     * published by the Free Software Foundation; either version 2.1 of
010     * the License, or (at your option) any later version.
011     *
012     * This software is distributed in the hope that it will be useful,
013     * but WITHOUT ANY WARRANTY; without even the implied warranty of
014     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015     * Lesser General Public License for more details.
016     *
017     * You should have received a copy of the GNU Lesser General Public
018     * License along with this software; if not, write to the Free
019     * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020     * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
021     */
022    package org.jboss.dna.common.util;
023    
024    /**
025     * <p>
026     * Encodes and decodes to and from Base64 notation.
027     * </p>
028     * <p>
029     * Homepage: <a href="http://iharder.net/base64">http://iharder.net/base64</a>.
030     * </p>
031     * <p>
032     * The <tt>options</tt> parameter, which appears in a few places, is used to pass several pieces of information to the encoder. In
033     * the "higher level" methods such as encodeBytes( bytes, options ) the options parameter can be used to indicate such things as
034     * first gzipping the bytes before encoding them, not inserting linefeeds (though that breaks strict Base64 compatibility), and
035     * encoding using the URL-safe and Ordered dialects.
036     * </p>
037     * <p>
038     * The constants defined in Base64 can be OR-ed together to combine options, so you might make a call like this:
039     * </p>
040     * <code>String encoded = Base64.encodeBytes( mybytes, Base64.GZIP | Base64.DONT_BREAK_LINES );</code>
041     * <p>
042     * to compress the data before encoding it and then making the output have no newline characters.
043     * </p>
044     * <p>
045     * Change Log:
046     * </p>
047     * <ul>
048     * <li>v2.2.2 - Fixed encodeFileToFile and decodeFileToFile to use the Base64.InputStream class to encode and decode on the fly
049     * which uses less memory than encoding/decoding an entire file into memory before writing.</li>
050     * <li>v2.2.1 - Fixed bug using URL_SAFE and ORDERED encodings. Fixed bug when using very small files (~< 40 bytes).</li>
051     * <li>v2.2 - Added some helper methods for encoding/decoding directly from one file to the next. Also added a main() method to
052     * support command line encoding/decoding from one file to the next. Also added these Base64 dialects:
053     * <ol>
054     * <li>The default is RFC3548 format.</li>
055     * <li>Calling Base64.setFormat(Base64.BASE64_FORMAT.URLSAFE_FORMAT) generates URL and file name friendly format as described in
056     * Section 4 of RFC3548. http://www.faqs.org/rfcs/rfc3548.html</li>
057     * <li>Calling Base64.setFormat(Base64.BASE64_FORMAT.ORDERED_FORMAT) generates URL and file name friendly format that preserves
058     * lexical ordering as described in http://www.faqs.org/qa/rfcc-1940.html</li>
059     * </ol>
060     * Special thanks to Jim Kellerman at <a href="http://www.powerset.com/">http://www.powerset.com/</a> for contributing the new
061     * Base64 dialects.</li>
062     * <li>v2.1 - Cleaned up javadoc comments and unused variables and methods. Added some convenience methods for reading and writing
063     * to and from files.</li>
064     * <li>v2.0.2 - Now specifies UTF-8 encoding in places where the code fails on systems with other encodings (like EBCDIC).</li>
065     * <li>v2.0.1 - Fixed an error when decoding a single byte, that is, when the encoded data was a single byte.</li>
066     * <li>v2.0 - I got rid of methods that used booleans to set options. Now everything is more consolidated and cleaner. The code
067     * now detects when data that's being decoded is gzip-compressed and will decompress it automatically. Generally things are
068     * cleaner. You'll probably have to change some method calls that you were making to support the new options format (<tt>int</tt>s
069     * that you "OR" together).</li>
070     * <li>v1.5.1 - Fixed bug when decompressing and decoding to a byte[] using <tt>decode( String s, boolean gzipCompressed )</tt>.
071     * Added the ability to "suspend" encoding in the Output Stream so you can turn on and off the encoding if you need to embed
072     * base64 data in an otherwise "normal" stream (like an XML file).</li>
073     * <li>v1.5 - Output stream pases on flush() command but doesn't do anything itself. This helps when using GZIP streams. Added the
074     * ability to GZip-compress objects before encoding them.</li>
075     * <li>v1.4 - Added helper methods to read/write files.</li>
076     * <li>v1.3.6 - Fixed OutputStream.flush() so that 'position' is reset.</li>
077     * <li>v1.3.5 - Added flag to turn on and off line breaks. Fixed bug in input stream where last buffer being read, if not
078     * completely full, was not returned.</li>
079     * <li>v1.3.4 - Fixed when "improperly padded stream" error was thrown at the wrong time.</li>
080     * <li>v1.3.3 - Fixed I/O streams which were totally messed up.</li>
081     * </ul>
082     * <p>
083     * I am placing this code in the Public Domain. Do with it as you will. This software comes with no guarantees or warranties but
084     * with plenty of well-wishing instead! Please visit <a href="http://iharder.net/base64">http://iharder.net/base64</a>
085     * periodically to check for updates or to contribute improvements.
086     * </p>
087     * 
088     * @author Robert Harder
089     * @author rob@iharder.net
090     * @version 2.2.2
091     */
092    public class Base64 {
093    
094        /* ********  P R I V A T E   F I E L D S  ******** */
095    
096        /** Maximum line length (76) of Base64 output. */
097        private final static int MAX_LINE_LENGTH = 76;
098    
099        /** The equals sign (=) as a byte. */
100        private final static byte EQUALS_SIGN = (byte)'=';
101    
102        /** The new line character (\n) as a byte. */
103        private final static byte NEW_LINE = (byte)'\n';
104    
105        /** Preferred encoding. */
106        private final static String PREFERRED_ENCODING = "UTF-8";
107    
108        private final static byte WHITE_SPACE_ENC = -5; // Indicates white space in encoding
109        private final static byte EQUALS_SIGN_ENC = -1; // Indicates equals sign in encoding
110    
111        /* ********  S T A N D A R D   B A S E 6 4   A L P H A B E T  ******** */
112    
113        /** The 64 valid Base64 values. */
114        /* Host platform me be something funny like EBCDIC, so we hardcode these values. */
115        private final static byte[] _STANDARD_ALPHABET = {(byte)'A', (byte)'B', (byte)'C', (byte)'D', (byte)'E', (byte)'F',
116            (byte)'G', (byte)'H', (byte)'I', (byte)'J', (byte)'K', (byte)'L', (byte)'M', (byte)'N', (byte)'O', (byte)'P', (byte)'Q',
117            (byte)'R', (byte)'S', (byte)'T', (byte)'U', (byte)'V', (byte)'W', (byte)'X', (byte)'Y', (byte)'Z', (byte)'a', (byte)'b',
118            (byte)'c', (byte)'d', (byte)'e', (byte)'f', (byte)'g', (byte)'h', (byte)'i', (byte)'j', (byte)'k', (byte)'l', (byte)'m',
119            (byte)'n', (byte)'o', (byte)'p', (byte)'q', (byte)'r', (byte)'s', (byte)'t', (byte)'u', (byte)'v', (byte)'w', (byte)'x',
120            (byte)'y', (byte)'z', (byte)'0', (byte)'1', (byte)'2', (byte)'3', (byte)'4', (byte)'5', (byte)'6', (byte)'7', (byte)'8',
121            (byte)'9', (byte)'+', (byte)'/'};
122    
123        /**
124         * Translates a Base64 value to either its 6-bit reconstruction value or a negative number indicating some other meaning.
125         **/
126        private final static byte[] _STANDARD_DECODABET = {-9, -9, -9, -9, -9, -9, -9, -9, -9, // Decimal 0 - 8
127            -5, -5, // Whitespace: Tab and Linefeed
128            -9, -9, // Decimal 11 - 12
129            -5, // Whitespace: Carriage Return
130            -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, // Decimal 14 - 26
131            -9, -9, -9, -9, -9, // Decimal 27 - 31
132            -5, // Whitespace: Space
133            -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, // Decimal 33 - 42
134            62, // Plus sign at decimal 43
135            -9, -9, -9, // Decimal 44 - 46
136            63, // Slash at decimal 47
137            52, 53, 54, 55, 56, 57, 58, 59, 60, 61, // Numbers zero through nine
138            -9, -9, -9, // Decimal 58 - 60
139            -1, // Equals sign at decimal 61
140            -9, -9, -9, // Decimal 62 - 64
141            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, // Letters 'A' through 'N'
142            14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // Letters 'O' through 'Z'
143            -9, -9, -9, -9, -9, -9, // Decimal 91 - 96
144            26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, // Letters 'a' through 'm'
145            39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, // Letters 'n' through 'z'
146            -9, -9, -9, -9 // Decimal 123 - 126
147        /*,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 127 - 139
148        -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 140 - 152
149        -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 153 - 165
150        -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 166 - 178
151        -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 179 - 191
152        -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 192 - 204
153        -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 205 - 217
154        -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 218 - 230
155        -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 231 - 243
156        -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9         // Decimal 244 - 255 */
157        };
158    
159        /** Defeats instantiation. */
160        private Base64() {
161        }
162    
163        /* ********  E N C O D I N G   M E T H O D S  ******** */
164    
165        /**
166         * <p>
167         * Encodes up to three bytes of the array <var>source</var> and writes the resulting four Base64 bytes to
168         * <var>destination</var>. The source and destination arrays can be manipulated anywhere along their length by specifying
169         * <var>srcOffset</var> and <var>destOffset</var>. This method does not check to make sure your arrays are large enough to
170         * accomodate <var>srcOffset</var> + 3 for the <var>source</var> array or <var>destOffset</var> + 4 for the
171         * <var>destination</var> array. The actual number of significant bytes in your array is given by <var>numSigBytes</var>.
172         * </p>
173         * <p>
174         * This is the lowest level of the encoding methods with all possible parameters.
175         * </p>
176         * 
177         * @param source the array to convert
178         * @param srcOffset the index where conversion begins
179         * @param numSigBytes the number of significant bytes in your array
180         * @param destination the array to hold the conversion
181         * @param destOffset the index where output will be put
182         * @return the <var>destination</var> array
183         * @since 1.3
184         */
185        private static byte[] encode3to4( byte[] source,
186                                          int srcOffset,
187                                          int numSigBytes,
188                                          byte[] destination,
189                                          int destOffset ) {
190            byte[] ALPHABET = _STANDARD_ALPHABET;
191    
192            // 1 2 3
193            // 01234567890123456789012345678901 Bit position
194            // --------000000001111111122222222 Array position from threeBytes
195            // --------| || || || | Six bit groups to index ALPHABET
196            // >>18 >>12 >> 6 >> 0 Right shift necessary
197            // 0x3f 0x3f 0x3f Additional AND
198    
199            // Create buffer with zero-padding if there are only one or two
200            // significant bytes passed in the array.
201            // We have to shift left 24 in order to flush out the 1's that appear
202            // when Java treats a value as negative that is cast from a byte to an int.
203            int inBuff = (numSigBytes > 0 ? ((source[srcOffset] << 24) >>> 8) : 0)
204                         | (numSigBytes > 1 ? ((source[srcOffset + 1] << 24) >>> 16) : 0)
205                         | (numSigBytes > 2 ? ((source[srcOffset + 2] << 24) >>> 24) : 0);
206    
207            switch (numSigBytes) {
208                case 3:
209                    destination[destOffset] = ALPHABET[(inBuff >>> 18)];
210                    destination[destOffset + 1] = ALPHABET[(inBuff >>> 12) & 0x3f];
211                    destination[destOffset + 2] = ALPHABET[(inBuff >>> 6) & 0x3f];
212                    destination[destOffset + 3] = ALPHABET[(inBuff) & 0x3f];
213                    return destination;
214    
215                case 2:
216                    destination[destOffset] = ALPHABET[(inBuff >>> 18)];
217                    destination[destOffset + 1] = ALPHABET[(inBuff >>> 12) & 0x3f];
218                    destination[destOffset + 2] = ALPHABET[(inBuff >>> 6) & 0x3f];
219                    destination[destOffset + 3] = EQUALS_SIGN;
220                    return destination;
221    
222                case 1:
223                    destination[destOffset] = ALPHABET[(inBuff >>> 18)];
224                    destination[destOffset + 1] = ALPHABET[(inBuff >>> 12) & 0x3f];
225                    destination[destOffset + 2] = EQUALS_SIGN;
226                    destination[destOffset + 3] = EQUALS_SIGN;
227                    return destination;
228    
229                default:
230                    return destination;
231            } // end switch
232        } // end encode3to4
233    
234        /**
235         * Encodes a byte array into Base64 notation. Does not GZip-compress data.
236         * 
237         * @param source The data to convert
238         * @return the encoded data
239         * @since 1.4
240         */
241        public static String encodeBytes( byte[] source ) {
242            // Convert option to boolean in way that code likes it.
243            boolean breakLines = false;
244            int len = source.length;
245            int len43 = len * 4 / 3;
246            byte[] outBuff = new byte[(len43) // Main 4:3
247                                      + ((len % 3) > 0 ? 4 : 0) // Account for padding
248                                      + (breakLines ? (len43 / MAX_LINE_LENGTH) : 0)]; // New lines
249            int d = 0;
250            int e = 0;
251            int len2 = len - 2;
252            int lineLength = 0;
253            for (; d < len2; d += 3, e += 4) {
254                encode3to4(source, d, 3, outBuff, e);
255    
256                lineLength += 4;
257                if (breakLines && lineLength == MAX_LINE_LENGTH) {
258                    outBuff[e + 4] = NEW_LINE;
259                    e++;
260                    lineLength = 0;
261                } // end if: end of line
262            } // en dfor: each piece of array
263    
264            if (d < len) {
265                encode3to4(source, d, len - d, outBuff, e);
266                e += 4;
267            } // end if: some padding needed
268    
269            // Return value according to relevant encoding.
270            try {
271                return new String(outBuff, 0, e, PREFERRED_ENCODING);
272            } // end try
273            catch (java.io.UnsupportedEncodingException uue) {
274                return new String(outBuff, 0, e);
275            } // end catch
276    
277        } // end else: don't compress
278    
279        /* ********  D E C O D I N G   M E T H O D S  ******** */
280    
281        /**
282         * Decodes four bytes from array <var>source</var> and writes the resulting bytes (up to three of them) to
283         * <var>destination</var>. The source and destination arrays can be manipulated anywhere along their length by specifying
284         * <var>srcOffset</var> and <var>destOffset</var>. This method does not check to make sure your arrays are large enough to
285         * accomodate <var>srcOffset</var> + 4 for the <var>source</var> array or <var>destOffset</var> + 3 for the
286         * <var>destination</var> array. This method returns the actual number of bytes that were converted from the Base64 encoding.
287         * <p>
288         * This is the lowest level of the decoding methods with all possible parameters.
289         * </p>
290         * 
291         * @param source the array to convert
292         * @param srcOffset the index where conversion begins
293         * @param destination the array to hold the conversion
294         * @param destOffset destination offset
295         * @return the number of decoded bytes converted
296         * @since 1.3
297         */
298        private static int decode4to3( byte[] source,
299                                       int srcOffset,
300                                       byte[] destination,
301                                       int destOffset ) {
302            byte[] DECODABET = _STANDARD_DECODABET;
303    
304            // Example: Dk==
305            if (source[srcOffset + 2] == EQUALS_SIGN) {
306                // Two ways to do the same thing. Don't know which way I like best.
307                // int outBuff = ( ( DECODABET[ source[ srcOffset ] ] << 24 ) >>> 6 )
308                // | ( ( DECODABET[ source[ srcOffset + 1] ] << 24 ) >>> 12 );
309                int outBuff = ((DECODABET[source[srcOffset]] & 0xFF) << 18) | ((DECODABET[source[srcOffset + 1]] & 0xFF) << 12);
310    
311                destination[destOffset] = (byte)(outBuff >>> 16);
312                return 1;
313            }
314    
315            // Example: DkL=
316            else if (source[srcOffset + 3] == EQUALS_SIGN) {
317                // Two ways to do the same thing. Don't know which way I like best.
318                // int outBuff = ( ( DECODABET[ source[ srcOffset ] ] << 24 ) >>> 6 )
319                // | ( ( DECODABET[ source[ srcOffset + 1 ] ] << 24 ) >>> 12 )
320                // | ( ( DECODABET[ source[ srcOffset + 2 ] ] << 24 ) >>> 18 );
321                int outBuff = ((DECODABET[source[srcOffset]] & 0xFF) << 18) | ((DECODABET[source[srcOffset + 1]] & 0xFF) << 12)
322                              | ((DECODABET[source[srcOffset + 2]] & 0xFF) << 6);
323    
324                destination[destOffset] = (byte)(outBuff >>> 16);
325                destination[destOffset + 1] = (byte)(outBuff >>> 8);
326                return 2;
327            }
328    
329            // Example: DkLE
330            else {
331                // Two ways to do the same thing. Don't know which way I like best.
332                // int outBuff = ( ( DECODABET[ source[ srcOffset ] ] << 24 ) >>> 6 )
333                // | ( ( DECODABET[ source[ srcOffset + 1 ] ] << 24 ) >>> 12 )
334                // | ( ( DECODABET[ source[ srcOffset + 2 ] ] << 24 ) >>> 18 )
335                // | ( ( DECODABET[ source[ srcOffset + 3 ] ] << 24 ) >>> 24 );
336                int outBuff = ((DECODABET[source[srcOffset]] & 0xFF) << 18) | ((DECODABET[source[srcOffset + 1]] & 0xFF) << 12)
337                              | ((DECODABET[source[srcOffset + 2]] & 0xFF) << 6) | ((DECODABET[source[srcOffset + 3]] & 0xFF));
338    
339                destination[destOffset] = (byte)(outBuff >> 16);
340                destination[destOffset + 1] = (byte)(outBuff >> 8);
341                destination[destOffset + 2] = (byte)(outBuff);
342    
343                return 3;
344            }
345        } // end decodeToBytes
346    
347        /**
348         * Decodes data from Base64 notation.
349         * 
350         * @param s the string to decode
351         * @return the decoded data
352         * @since 1.4
353         */
354        public static byte[] decode( String s ) {
355            byte[] source;
356            try {
357                source = s.getBytes(PREFERRED_ENCODING);
358            } // end try
359            catch (java.io.UnsupportedEncodingException uee) {
360                source = s.getBytes();
361            } // end catch
362            // </change>
363            if (source.length % 4 != 0) {
364                throw new IllegalArgumentException("Source bytes are not valid"); //$NON-NLS-1$
365            }
366            byte[] DECODABET = _STANDARD_DECODABET;
367            int len = source.length;
368            byte[] outBuff = new byte[len * 3 / 4]; // Upper limit on size of output
369            int outBuffPosn = 0;
370    
371            byte[] b4 = new byte[4];
372            int b4Posn = 0;
373            int i = 0;
374            byte sbiCrop = 0;
375            byte sbiDecode = 0;
376            for (i = 0; i < len; i++) {
377                sbiCrop = (byte)(source[i] & 0x7f); // Only the low seven bits
378                sbiDecode = DECODABET[sbiCrop];
379    
380                if (sbiDecode >= WHITE_SPACE_ENC) // White space, Equals sign or better
381                {
382                    if (sbiDecode >= EQUALS_SIGN_ENC) {
383                        b4[b4Posn++] = sbiCrop;
384                        if (b4Posn > 3) {
385                            outBuffPosn += decode4to3(b4, 0, outBuff, outBuffPosn);
386                            b4Posn = 0;
387    
388                            // If that was the equals sign, break out of 'for' loop
389                            if (sbiCrop == EQUALS_SIGN) break;
390                        } // end if: quartet built
391    
392                    } // end if: equals sign or better
393    
394                } // end if: white space, equals sign or better
395                else {
396                    throw new IllegalArgumentException("Source bytes are not valid"); //$NON-NLS-1$
397                } // end else:
398            } // each input character
399    
400            byte[] out = new byte[outBuffPosn];
401            System.arraycopy(outBuff, 0, out, 0, outBuffPosn);
402            return out;
403        } // end decode
404    }