网上有很多源代码可以实现Base64编码的转换,但是主要是对中文转换的时候有问题。
后来在网上找了很多资料,发现原来在Java默认的实现机制中,内部使用UTF-16编码,而所有算法是针对英文UTF-8的。因此,在对中文字符串转换的过程中会出现截取错误。后来在源代码的基础上进行了小的调整,最好可以实现Base64编码。
下面贴出一个实现代码:
1
package com.aostarit.idm;
2
3
import java.io.UnsupportedEncodingException;
4
import java.util.Arrays;
5
6
/** *//**
7
* A very fast and memory efficient class to encode and decode to and from
8
* BASE64 in full accordance with RFC 2045.<br>
9
* <br>
10
* On Windows XP sp1 with 1.4.2_04 and later ;), this encoder and decoder is
11
* about 10 times faster on small arrays (10 - 1000 bytes) and 2-3 times as fast
12
* on larger arrays (10000 - 1000000 bytes) compared to
13
* <code>sun.misc.Encoder()/Decoder()</code>.<br>
14
* <br>
15
*
16
* On byte arrays the encoder is about 20% faster than Jakarta Commons Base64
17
* Codec for encode and about 50% faster for decoding large arrays. This
18
* implementation is about twice as fast on very small arrays (< 30 bytes). If
19
* source/destination is a <code>String</code> this version is about three
20
* times as fast due to the fact that the Commons Codec result has to be recoded
21
* to a <code>String</code> from <code>byte[]</code>, which is very
22
* expensive.<br>
23
* <br>
24
*
25
* This encode/decode algorithm doesn't create any temporary arrays as many
26
* other codecs do, it only allocates the resulting array. This produces less
27
* garbage and it is possible to handle arrays twice as large as algorithms that
28
* create a temporary array. (E.g. Jakarta Commons Codec). It is unknown whether
29
* Sun's <code>sun.misc.Encoder()/Decoder()</code> produce temporary arrays
30
* but since performance is quite low it probably does.<br>
31
* <br>
32
*
33
* The encoder produces the same output as the Sun one except that the Sun's
34
* encoder appends a trailing line separator if the last character isn't a pad.
35
* Unclear why but it only adds to the length and is probably a side effect.
36
* Both are in conformance with RFC 2045 though.<br>
37
* Commons codec seem to always att a trailing line separator.<br>
38
* <br>
39
*
40
* <b>Note!</b> The encode/decode method pairs (types) come in three versions
41
* with the <b>exact</b> same algorithm and thus a lot of code redundancy. This
42
* is to not create any temporary arrays for transcoding to/from different
43
* format types. The methods not used can simply be commented out.<br>
44
* <br>
45
*
46
* There is also a "fast" version of all decode methods that works the same way
47
* as the normal ones, but har a few demands on the decoded input. Normally
48
* though, these fast verions should be used if the source if the input is known
49
* and it hasn't bee tampered with.<br>
50
* <br>
51
*
52
* If you find the code useful or you find a bug, please send me a note at
53
* base64 @ miginfocom . com.
54
*
55
* Licence (BSD): ==============
56
*
57
* Copyright (c) 2004, Mikael Grev, MiG InfoCom AB. (base64 @ miginfocom . com)
58
* All rights reserved.
59
*
60
* Redistribution and use in source and binary forms, with or without
61
* modification, are permitted provided that the following conditions are met:
62
* Redistributions of source code must retain the above copyright notice, this
63
* list of conditions and the following disclaimer. Redistributions in binary
64
* form must reproduce the above copyright notice, this list of conditions and
65
* the following disclaimer in the documentation and/or other materials provided
66
* with the distribution. Neither the name of the MiG InfoCom AB nor the names
67
* of its contributors may be used to endorse or promote products derived from
68
* this software without specific prior written permission.
69
*
70
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
71
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
72
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
73
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
74
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
75
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
76
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
77
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
78
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
79
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
80
* POSSIBILITY OF SUCH DAMAGE.
81
*
82
* @version 2.2
83
* @author Mikael Grev Date: 2004-aug-02 Time: 11:31:11
84
*/
85
86
public class Base64
{
87
private static final char[] CA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
88
.toCharArray();
89
private static final int[] IA = new int[256];
90
static
{
91
Arrays.fill(IA, -1);
92
for (int i = 0, iS = CA.length; i < iS; i++)
93
IA[CA[i]] = i;
94
IA['='] = 0;
95
}
96
97
// ****************************************************************************************
98
// * char[] version
99
// ****************************************************************************************
100
101
/** *//**
102
* Encodes a raw byte array into a BASE64 <code>char[]</code>
103
* representation i accordance with RFC 2045.
104
*
105
* @param sArr
106
* The bytes to convert. If <code>null</code> or length 0 an
107
* empty array will be returned.
108
* @param lineSep
109
* Optional "\r\n" after 76 characters, unless end of file.<br>
110
* No line separator will be in breach of RFC 2045 which
111
* specifies max 76 per line but will be a little faster.
112
* @return A BASE64 encoded array. Never <code>null</code>.
113
*/
114
public final static char[] encodeToChar(byte[] sArr, boolean lineSep)
{
115
// Check special case
116
int sLen = sArr != null ? sArr.length : 0;
117
if (sLen == 0)
118
return new char[0];
119
120
int eLen = (sLen / 3) * 3; // Length of even 24-bits.
121
int cCnt = ((sLen - 1) / 3 + 1) << 2; // Returned character count
122
int dLen = cCnt + (lineSep ? (cCnt - 1) / 76 << 1 : 0); // Length of
123
// returned
124
// array
125
char[] dArr = new char[dLen];
126
127
// Encode even 24-bits
128
for (int s = 0, d = 0, cc = 0; s < eLen;)
{
129
// Copy next three bytes into lower 24 bits of int, paying attension
130
// to sign.
131
int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8
132
| (sArr[s++] & 0xff);
133
134
// Encode the int into four chars
135
dArr[d++] = CA[(i >>> 18) & 0x3f];
136
dArr[d++] = CA[(i >>> 12) & 0x3f];
137
dArr[d++] = CA[(i >>> 6) & 0x3f];
138
dArr[d++] = CA[i & 0x3f];
139
140
// Add optional line separator
141
if (lineSep && ++cc == 19 && d < dLen - 2)
{
142
dArr[d++] = '\r';
143
dArr[d++] = '\n';
144
cc = 0;
145
}
146
}
147
148
// Pad and encode last bits if source isn't even 24 bits.
149
int left = sLen - eLen; // 0 - 2.
150
if (left > 0)
{
151
// Prepare the int
152
int i = ((sArr[eLen] & 0xff) << 10)
153
| (left == 2 ? ((sArr[sLen - 1] & 0xff) << 2) : 0);
154
155
// Set last four chars
156
dArr[dLen - 4] = CA[i >> 12];
157
dArr[dLen - 3] = CA[(i >>> 6) & 0x3f];
158
dArr[dLen - 2] = left == 2 ? CA[i & 0x3f] : '=';
159
dArr[dLen - 1] = '=';
160
}
161
return dArr;
162
}
163
164
/** *//**
165
* Decodes a BASE64 encoded char array. All illegal characters will be
166
* ignored and can handle both arrays with and without line separators.
167
*
168
* @param sArr
169
* The source array. <code>null</code> or length 0 will return
170
* an empty array.
171
* @return The decoded array of bytes. May be of length 0. Will be
172
* <code>null</code> if the legal characters (including '=') isn't
173
* divideable by 4. (I.e. definitely corrupted).
174
*/
175
public final static byte[] decode(char[] sArr)
{
176
// Check special case
177
int sLen = sArr != null ? sArr.length : 0;
178
if (sLen == 0)
179
return new byte[0];
180
181
// Count illegal characters (including '\r', '\n') to know what size the
182
// returned array will be,
183
// so we don't have to reallocate & copy it later.
184
int sepCnt = 0; // Number of separator characters. (Actually illegal
185
// characters, but that's a bonus
)
186
for (int i = 0; i < sLen; i++)
187
// If input is "pure" (I.e. no line separators or illegal chars)
188
// base64 this loop can be commented out.
189
if (IA[sArr[i]] < 0)
190
sepCnt++;
191
192
// Check so that legal chars (including '=') are evenly divideable by 4
193
// as specified in RFC 2045.
194
if ((sLen - sepCnt) % 4 != 0)
195
return null;
196
197
int pad = 0;
198
for (int i = sLen; i > 1 && IA[sArr[--i]] <= 0;)
199
if (sArr[i] == '=')
200
pad++;
201
202
int len = ((sLen - sepCnt) * 6 >> 3) - pad;
203
204
byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
205
206
for (int s = 0, d = 0; d < len;)
{
207
// Assemble three bytes into an int from four "valid" characters.
208
int i = 0;
209
for (int j = 0; j < 4; j++)
{ // j only increased if a valid char
210
// was found.
211
int c = IA[sArr[s++]];
212
if (c >= 0)
213
i |= c << (18 - j * 6);
214
else
215
j--;
216
}
217
// Add the bytes
218
dArr[d++] = (byte) (i >> 16);
219
if (d < len)
{
220
dArr[d++] = (byte) (i >> 8);
221
if (d < len)
222
dArr[d++] = (byte) i;
223
}
224
}
225
return dArr;
226
}
227
228
/** *//**
229
* Decodes a BASE64 encoded char array that is known to be resonably well
230
* formatted. The method is about twice as fast as {@link #decode(char[])}.
231
* The preconditions are:<br> + The array must have a line length of 76
232
* chars OR no line separators at all (one line).<br> + Line separator must
233
* be "\r\n", as specified in RFC 2045 + The array must not contain illegal
234
* characters within the encoded string<br> + The array CAN have illegal
235
* characters at the beginning and end, those will be dealt with
236
* appropriately.<br>
237
*
238
* @param sArr
239
* The source array. Length 0 will return an empty array.
240
* <code>null</code> will throw an exception.
241
* @return The decoded array of bytes. May be of length 0.
242
*/
243
public final static byte[] decodeFast(char[] sArr)
{
244
// Check special case
245
int sLen = sArr.length;
246
if (sLen == 0)
247
return new byte[0];
248
249
int sIx = 0, eIx = sLen - 1; // Start and end index after trimming.
250
251
// Trim illegal chars from start
252
while (sIx < eIx && IA[sArr[sIx]] < 0)
253
sIx++;
254
255
// Trim illegal chars from end
256
while (eIx > 0 && IA[sArr[eIx]] < 0)
257
eIx--;
258
259
// get the padding count (=) (0, 1 or 2)
260
int pad = sArr[eIx] == '=' ? (sArr[eIx - 1] == '=' ? 2 : 1) : 0; // Count
261
// '='
262
// at
263
// end.
264
int cCnt = eIx - sIx + 1; // Content count including possible
265
// separators
266
int sepCnt = sLen > 76 ? (sArr[76] == '\r' ? cCnt / 78 : 0) << 1 : 0;
267
268
int len = ((cCnt - sepCnt) * 6 >> 3) - pad; // The number of decoded
269
// bytes
270
byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
271
272
// Decode all but the last 0 - 2 bytes.
273
int d = 0;
274
for (int cc = 0, eLen = (len / 3) * 3; d < eLen;)
{
275
// Assemble three bytes into an int from four "valid" characters.
276
int i = IA[sArr[sIx++]] << 18 | IA[sArr[sIx++]] << 12
277
| IA[sArr[sIx++]] << 6 | IA[sArr[sIx++]];
278
279
// Add the bytes
280
dArr[d++] = (byte) (i >> 16);
281
dArr[d++] = (byte) (i >> 8);
282
dArr[d++] = (byte) i;
283
284
// If line separator, jump over it.
285
if (sepCnt > 0 && ++cc == 19)
{
286
sIx += 2;
287
cc = 0;
288
}
289
}
290
291
if (d < len)
{
292
// Decode last 1-3 bytes (incl '=') into 1-3 bytes
293
int i = 0;
294
for (int j = 0; sIx <= eIx - pad; j++)
295
i |= IA[sArr[sIx++]] << (18 - j * 6);
296
297
for (int r = 16; d < len; r -= 8)
298
dArr[d++] = (byte) (i >> r);
299
}
300
301
return dArr;
302
}
303
304
// ****************************************************************************************
305
// * byte[] version
306
// ****************************************************************************************
307
308
/** *//**
309
* Encodes a raw byte array into a BASE64 <code>byte[]</code>
310
* representation i accordance with RFC 2045.
311
*
312
* @param sArr
313
* The bytes to convert. If <code>null</code> or length 0 an
314
* empty array will be returned.
315
* @param lineSep
316
* Optional "\r\n" after 76 characters, unless end of file.<br>
317
* No line separator will be in breach of RFC 2045 which
318
* specifies max 76 per line but will be a little faster.
319
* @return A BASE64 encoded array. Never <code>null</code>.
320
*/
321
public final static byte[] encodeToByte(byte[] sArr, boolean lineSep)
{
322
// Check special case
323
int sLen = sArr != null ? sArr.length : 0;
324
if (sLen == 0)
325
return new byte[0];
326
327
int eLen = (sLen / 3) * 3; // Length of even 24-bits.
328
int cCnt = ((sLen - 1) / 3 + 1) << 2; // Returned character count
329
int dLen = cCnt + (lineSep ? (cCnt - 1) / 76 << 1 : 0); // Length of
330
// returned
331
// array
332
byte[] dArr = new byte[dLen];
333
334
// Encode even 24-bits
335
for (int s = 0, d = 0, cc = 0; s < eLen;)
{
336
// Copy next three bytes into lower 24 bits of int, paying attension
337
// to sign.
338
int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8
339
| (sArr[s++] & 0xff);
340
341
// Encode the int into four chars
342
dArr[d++] = (byte) CA[(i >>> 18) & 0x3f];
343
dArr[d++] = (byte) CA[(i >>> 12) & 0x3f];
344
dArr[d++] = (byte) CA[(i >>> 6) & 0x3f];
345
dArr[d++] = (byte) CA[i & 0x3f];
346
347
// Add optional line separator
348
if (lineSep && ++cc == 19 && d < dLen - 2)
{
349
dArr[d++] = '\r';
350
dArr[d++] = '\n';
351
cc = 0;
352
}
353
}
354
355
// Pad and encode last bits if source isn't an even 24 bits.
356
int left = sLen - eLen; // 0 - 2.
357
if (left > 0)
{
358
// Prepare the int
359
int i = ((sArr[eLen] & 0xff) << 10)
360
| (left == 2 ? ((sArr[sLen - 1] & 0xff) << 2) : 0);
361
362
// Set last four chars
363
dArr[dLen - 4] = (byte) CA[i >> 12];
364
dArr[dLen - 3] = (byte) CA[(i >>> 6) & 0x3f];
365
dArr[dLen - 2] = left == 2 ? (byte) CA[i & 0x3f] : (byte) '=';
366
dArr[dLen - 1] = '=';
367
}
368
return dArr;
369
}
370
371
/** *//**
372
* Decodes a BASE64 encoded byte array. All illegal characters will be
373
* ignored and can handle both arrays with and without line separators.
374
*
375
* @param sArr
376
* The source array. Length 0 will return an empty array.
377
* <code>null</code> will throw an exception.
378
* @return The decoded array of bytes. May be of length 0. Will be
379
* <code>null</code> if the legal characters (including '=') isn't
380
* divideable by 4. (I.e. definitely corrupted).
381
*/
382
public final static byte[] decode(byte[] sArr)
{
383
// Check special case
384
int sLen = sArr.length;
385
386
// Count illegal characters (including '\r', '\n') to know what size the
387
// returned array will be,
388
// so we don't have to reallocate & copy it later.
389
int sepCnt = 0; // Number of separator characters. (Actually illegal
390
// characters, but that's a bonus
)
391
for (int i = 0; i < sLen; i++)
392
// If input is "pure" (I.e. no line separators or illegal chars)
393
// base64 this loop can be commented out.
394
if (IA[sArr[i] & 0xff] < 0)
395
sepCnt++;
396
397
// Check so that legal chars (including '=') are evenly divideable by 4
398
// as specified in RFC 2045.
399
if ((sLen - sepCnt) % 4 != 0)
400
return null;
401
402
int pad = 0;
403
for (int i = sLen; i > 1 && IA[sArr[--i] & 0xff] <= 0;)
404
if (sArr[i] == '=')
405
pad++;
406
407
int len = ((sLen - sepCnt) * 6 >> 3) - pad;
408
409
byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
410
411
for (int s = 0, d = 0; d < len;)
{
412
// Assemble three bytes into an int from four "valid" characters.
413
int i = 0;
414
for (int j = 0; j < 4; j++)
{ // j only increased if a valid char
415
// was found.
416
int c = IA[sArr[s++] & 0xff];
417
if (c >= 0)
418
i |= c << (18 - j * 6);
419
else
420
j--;
421
}
422
423
// Add the bytes
424
dArr[d++] = (byte) (i >> 16);
425
if (d < len)
{
426
dArr[d++] = (byte) (i >> 8);
427
if (d < len)
428
dArr[d++] = (byte) i;
429
}
430
}
431
432
return dArr;
433
}
434
435
/** *//**
436
* Decodes a BASE64 encoded byte array that is known to be resonably well
437
* formatted. The method is about twice as fast as {@link #decode(byte[])}.
438
* The preconditions are:<br> + The array must have a line length of 76
439
* chars OR no line separators at all (one line).<br> + Line separator must
440
* be "\r\n", as specified in RFC 2045 + The array must not contain illegal
441
* characters within the encoded string<br> + The array CAN have illegal
442
* characters at the beginning and end, those will be dealt with
443
* appropriately.<br>
444
*
445
* @param sArr
446
* The source array. Length 0 will return an empty array.
447
* <code>null</code> will throw an exception.
448
* @return The decoded array of bytes. May be of length 0.
449
*/
450
public final static byte[] decodeFast(byte[] sArr)
{
451
// Check special case
452
int sLen = sArr.length;
453
if (sLen == 0)
454
return new byte[0];
455
456
int sIx = 0, eIx = sLen - 1; // Start and end index after trimming.
457
458
// Trim illegal chars from start
459
while (sIx < eIx && IA[sArr[sIx] & 0xff] < 0)
460
sIx++;
461
462
// Trim illegal chars from end
463
while (eIx > 0 && IA[sArr[eIx] & 0xff] < 0)
464
eIx--;
465
466
// get the padding count (=) (0, 1 or 2)
467
int pad = sArr[eIx] == '=' ? (sArr[eIx - 1] == '=' ? 2 : 1) : 0; // Count
468
// '='
469
// at
470
// end.
471
int cCnt = eIx - sIx + 1; // Content count including possible
472
// separators
473
int sepCnt = sLen > 76 ? (sArr[76] == '\r' ? cCnt / 78 : 0) << 1 : 0;
474
475
int len = ((cCnt - sepCnt) * 6 >> 3) - pad; // The number of decoded
476
// bytes
477
byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
478
479
// Decode all but the last 0 - 2 bytes.
480
int d = 0;
481
for (int cc = 0, eLen = (len / 3) * 3; d < eLen;)
{
482
// Assemble three bytes into an int from four "valid" characters.
483
int i = IA[sArr[sIx++]] << 18 | IA[sArr[sIx++]] << 12
484
| IA[sArr[sIx++]] << 6 | IA[sArr[sIx++]];
485
486
// Add the bytes
487
dArr[d++] = (byte) (i >> 16);
488
dArr[d++] = (byte) (i >> 8);
489
dArr[d++] = (byte) i;
490
491
// If line separator, jump over it.
492
if (sepCnt > 0 && ++cc == 19)
{
493
sIx += 2;
494
cc = 0;
495
}
496
}
497
498
if (d < len)
{
499
// Decode last 1-3 bytes (incl '=') into 1-3 bytes
500
int i = 0;
501
for (int j = 0; sIx <= eIx - pad; j++)
502
i |= IA[sArr[sIx++]] << (18 - j * 6);
503
504
for (int r = 16; d < len; r -= 8)
505
dArr[d++] = (byte) (i >> r);
506
}
507
508
return dArr;
509
}
510
511
// ****************************************************************************************
512
// * String version
513
// ****************************************************************************************
514
515
/** *//**
516
* Encodes a raw byte array into a BASE64 <code>String</code>
517
* representation i accordance with RFC 2045.
518
*
519
* @param sArr
520
* The bytes to convert. If <code>null</code> or length 0 an
521
* empty array will be returned.
522
* @param lineSep
523
* Optional "\r\n" after 76 characters, unless end of file.<br>
524
* No line separator will be in breach of RFC 2045 which
525
* specifies max 76 per line but will be a little faster.
526
* @return A BASE64 encoded array. Never <code>null</code>.
527
*/
528
public final static String encodeToString(byte[] sArr, boolean lineSep)
{
529
// Reuse char[] since we can't create a String incrementally anyway and
530
// StringBuffer/Builder would be slower.
531
return new String(encodeToChar(sArr, lineSep));
532
}
533
534
public final static String encode(String s)
{
535
// Reuse char[] since we can't create a String incrementally anyway and
536
// StringBuffer/Builder would be slower.
537
try
{
538
return new String(encodeToChar(s.getBytes("UTF-8"), false));
539
} catch (UnsupportedEncodingException e)
{
540
System.err.println("Base64 encoding error: " + e.getMessage());
541
e.printStackTrace();
542
}
543
return null;
544
}
545
546
/** *//**
547
* Decodes a BASE64 encoded <code>String</code>. All illegal characters
548
* will be ignored and can handle both strings with and without line
549
* separators.<br>
550
* <b>Note!</b> It can be up to about 2x the speed to call
551
* <code>decode(str.toCharArray())</code> instead. That will create a
552
* temporary array though. This version will use <code>str.charAt(i)</code>
553
* to iterate the string.
554
*
555
* @param str
556
* The source string. <code>null</code> or length 0 will return
557
* an empty array.
558
* @return The decoded array of bytes. May be of length 0. Will be
559
* <code>null</code> if the legal characters (including '=') isn't
560
* divideable by 4. (I.e. definitely corrupted).
561
*/
562
public final static byte[] decode(String str, boolean used)
{
563
// Check special case
564
int sLen = str != null ? str.length() : 0;
565
if (sLen == 0)
566
return new byte[0];
567
568
// Count illegal characters (including '\r', '\n') to know what size the
569
// returned array will be,
570
// so we don't have to reallocate & copy it later.
571
int sepCnt = 0; // Number of separator characters. (Actually illegal
572
// characters, but that's a bonus
)
573
for (int i = 0; i < sLen; i++)
574
// If input is "pure" (I.e. no line separators or illegal chars)
575
// base64 this loop can be commented out.
576
if (IA[str.charAt(i)] < 0)
577
sepCnt++;
578
579
// Check so that legal chars (including '=') are evenly divideable by 4
580
// as specified in RFC 2045.
581
if ((sLen - sepCnt) % 4 != 0)
582
return null;
583
584
// Count '=' at end
585
int pad = 0;
586
for (int i = sLen; i > 1 && IA[str.charAt(--i)] <= 0;)
587
if (str.charAt(i) == '=')
588
pad++;
589
590
int len = ((sLen - sepCnt) * 6 >> 3) - pad;
591
592
byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
593
594
for (int s = 0, d = 0; d < len;)
{
595
// Assemble three bytes into an int from four "valid" characters.
596
int i = 0;
597
for (int j = 0; j < 4; j++)
{ // j only increased if a valid char
598
// was found.
599
int c = IA[str.charAt(s++)];
600
if (c >= 0)
601
i |= c << (18 - j * 6);
602
else
603
j--;
604
}
605
// Add the bytes
606
dArr[d++] = (byte) (i >> 16);
607
if (d < len)
{
608
dArr[d++] = (byte) (i >> 8);
609
if (d < len)
610
dArr[d++] = (byte) i;
611
}
612
}
613
return dArr;
614
}
615
616
/** *//**
617
* Decodes a BASE64 encoded string that is known to be resonably well
618
* formatted. The method is about twice as fast as {@link #decode(String)}.
619
* The preconditions are:<br> + The array must have a line length of 76
620
* chars OR no line separators at all (one line).<br> + Line separator must
621
* be "\r\n", as specified in RFC 2045 + The array must not contain illegal
622
* characters within the encoded string<br> + The array CAN have illegal
623
* characters at the beginning and end, those will be dealt with
624
* appropriately.<br>
625
*
626
* @param s
627
* The source string. Length 0 will return an empty array.
628
* <code>null</code> will throw an exception.
629
* @return The decoded array of bytes. May be of length 0.
630
*/
631
public final static byte[] decodeFast(String s)
{
632
// Check special case
633
int sLen = s.length();
634
if (sLen == 0)
635
return new byte[0];
636
637
int sIx = 0, eIx = sLen - 1; // Start and end index after trimming.
638
639
// Trim illegal chars from start
640
while (sIx < eIx && IA[s.charAt(sIx) & 0xff] < 0)
641
sIx++;
642
643
// Trim illegal chars from end
644
while (eIx > 0 && IA[s.charAt(eIx) & 0xff] < 0)
645
eIx--;
646
647
// get the padding count (=) (0, 1 or 2)
648
int pad = s.charAt(eIx) == '=' ? (s.charAt(eIx - 1) == '=' ? 2 : 1) : 0; // Count
649
// '='
650
// at
651
// end.
652
int cCnt = eIx - sIx + 1; // Content count including possible
653
// separators
654
int sepCnt = sLen > 76 ? (s.charAt(76) == '\r' ? cCnt / 78 : 0) << 1
655
: 0;
656
657
int len = ((cCnt - sepCnt) * 6 >> 3) - pad; // The number of decoded
658
// bytes
659
byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
660
661
// Decode all but the last 0 - 2 bytes.
662
int d = 0;
663
for (int cc = 0, eLen = (len / 3) * 3; d < eLen;)
{
664
// Assemble three bytes into an int from four "valid" characters.
665
int i = IA[s.charAt(sIx++)] << 18 | IA[s.charAt(sIx++)] << 12
666
| IA[s.charAt(sIx++)] << 6 | IA[s.charAt(sIx++)];
667
668
// Add the bytes
669
dArr[d++] = (byte) (i >> 16);
670
dArr[d++] = (byte) (i >> 8);
671
dArr[d++] = (byte) i;
672
673
// If line separator, jump over it.
674
if (sepCnt > 0 && ++cc == 19)
{
675
sIx += 2;
676
cc = 0;
677
}
678
}
679
680
if (d < len)
{
681
// Decode last 1-3 bytes (incl '=') into 1-3 bytes
682
int i = 0;
683
for (int j = 0; sIx <= eIx - pad; j++)
684
i |= IA[s.charAt(sIx++)] << (18 - j * 6);
685
686
for (int r = 16; d < len; r -= 8)
687
dArr[d++] = (byte) (i >> r);
688
}
689
690
return dArr;
691
}
692
693
public static String decode(String s) throws UnsupportedEncodingException
{
694
return new String(Base64.decodeFast(s), "UTF-8");
695
}
696
697
public static void main(String[] args) throws UnsupportedEncodingException
{
698
String s = "测试账户";
699
String encodeS = Base64.encode(s);
700
System.out.println(encodeS);
701
System.out.println(Base64.decode(encodeS));
702
}
703
}
704
主要是在538行和694行的两个在UTF-8和UTF-16之间的转换。
posted on 2007-09-15 18:40
思考 阅读(7718)
评论(5) 编辑 收藏 所属分类:
Linux