|
1 /* |
|
2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. Oracle designates this |
|
8 * particular file as subject to the "Classpath" exception as provided |
|
9 * by Oracle in the LICENSE file that accompanied this code. |
|
10 * |
|
11 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 * version 2 for more details (a copy is included in the LICENSE file that |
|
15 * accompanied this code). |
|
16 * |
|
17 * You should have received a copy of the GNU General Public License version |
|
18 * 2 along with this work; if not, write to the Free Software Foundation, |
|
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 * |
|
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
22 * or visit www.oracle.com if you need additional information or have any |
|
23 * questions. |
|
24 */ |
|
25 |
|
26 /* FROM mail.jar */ |
|
27 package com.sun.xml.internal.org.jvnet.mimepull; |
|
28 |
|
29 import java.io.*; |
|
30 |
|
31 /** |
|
32 * This class implements a BASE64 Decoder. It is implemented as |
|
33 * a FilterInputStream, so one can just wrap this class around |
|
34 * any input stream and read bytes from this filter. The decoding |
|
35 * is done as the bytes are read out. |
|
36 * |
|
37 * @author John Mani |
|
38 * @author Bill Shannon |
|
39 */ |
|
40 |
|
41 final class BASE64DecoderStream extends FilterInputStream { |
|
42 // buffer of decoded bytes for single byte reads |
|
43 private byte[] buffer = new byte[3]; |
|
44 private int bufsize = 0; // size of the cache |
|
45 private int index = 0; // index into the cache |
|
46 |
|
47 // buffer for almost 8K of typical 76 chars + CRLF lines, |
|
48 // used by getByte method. this buffer contains encoded bytes. |
|
49 private byte[] input_buffer = new byte[78*105]; |
|
50 private int input_pos = 0; |
|
51 private int input_len = 0;; |
|
52 |
|
53 private boolean ignoreErrors = false; |
|
54 |
|
55 /** |
|
56 * Create a BASE64 decoder that decodes the specified input stream. |
|
57 * The System property <code>mail.mime.base64.ignoreerrors</code> |
|
58 * controls whether errors in the encoded data cause an exception |
|
59 * or are ignored. The default is false (errors cause exception). |
|
60 * |
|
61 * @param in the input stream |
|
62 */ |
|
63 public BASE64DecoderStream(InputStream in) { |
|
64 super(in); |
|
65 // default to false |
|
66 ignoreErrors = PropUtil.getBooleanSystemProperty( |
|
67 "mail.mime.base64.ignoreerrors", false); |
|
68 } |
|
69 |
|
70 /** |
|
71 * Create a BASE64 decoder that decodes the specified input stream. |
|
72 * |
|
73 * @param in the input stream |
|
74 * @param ignoreErrors ignore errors in encoded data? |
|
75 */ |
|
76 public BASE64DecoderStream(InputStream in, boolean ignoreErrors) { |
|
77 super(in); |
|
78 this.ignoreErrors = ignoreErrors; |
|
79 } |
|
80 |
|
81 /** |
|
82 * Read the next decoded byte from this input stream. The byte |
|
83 * is returned as an <code>int</code> in the range <code>0</code> |
|
84 * to <code>255</code>. If no byte is available because the end of |
|
85 * the stream has been reached, the value <code>-1</code> is returned. |
|
86 * This method blocks until input data is available, the end of the |
|
87 * stream is detected, or an exception is thrown. |
|
88 * |
|
89 * @return next byte of data, or <code>-1</code> if the end of the |
|
90 * stream is reached. |
|
91 * @exception IOException if an I/O error occurs. |
|
92 * @see java.io.FilterInputStream#in |
|
93 */ |
|
94 @Override |
|
95 public int read() throws IOException { |
|
96 if (index >= bufsize) { |
|
97 bufsize = decode(buffer, 0, buffer.length); |
|
98 if (bufsize <= 0) { |
|
99 return -1; |
|
100 } |
|
101 index = 0; // reset index into buffer |
|
102 } |
|
103 return buffer[index++] & 0xff; // Zero off the MSB |
|
104 } |
|
105 |
|
106 /** |
|
107 * Reads up to <code>len</code> decoded bytes of data from this input stream |
|
108 * into an array of bytes. This method blocks until some input is |
|
109 * available. |
|
110 * <p> |
|
111 * |
|
112 * @param buf the buffer into which the data is read. |
|
113 * @param off the start offset of the data. |
|
114 * @param len the maximum number of bytes read. |
|
115 * @return the total number of bytes read into the buffer, or |
|
116 * <code>-1</code> if there is no more data because the end of |
|
117 * the stream has been reached. |
|
118 * @exception IOException if an I/O error occurs. |
|
119 */ |
|
120 @Override |
|
121 public int read(byte[] buf, int off, int len) throws IOException { |
|
122 // empty out single byte read buffer |
|
123 int off0 = off; |
|
124 while (index < bufsize && len > 0) { |
|
125 buf[off++] = buffer[index++]; |
|
126 len--; |
|
127 } |
|
128 if (index >= bufsize) { |
|
129 bufsize = index = 0; |
|
130 } |
|
131 |
|
132 int bsize = (len / 3) * 3; // round down to multiple of 3 bytes |
|
133 if (bsize > 0) { |
|
134 int size = decode(buf, off, bsize); |
|
135 off += size; |
|
136 len -= size; |
|
137 |
|
138 if (size != bsize) { // hit EOF? |
|
139 if (off == off0) { |
|
140 return -1; |
|
141 } else { |
|
142 return off - off0; |
|
143 } |
|
144 } |
|
145 } |
|
146 |
|
147 // finish up with a partial read if necessary |
|
148 for (; len > 0; len--) { |
|
149 int c = read(); |
|
150 if (c == -1) { |
|
151 break; |
|
152 } |
|
153 buf[off++] = (byte)c; |
|
154 } |
|
155 |
|
156 if (off == off0) { |
|
157 return -1; |
|
158 } else { |
|
159 return off - off0; |
|
160 } |
|
161 } |
|
162 |
|
163 /** |
|
164 * Skips over and discards n bytes of data from this stream. |
|
165 */ |
|
166 @Override |
|
167 public long skip(long n) throws IOException { |
|
168 long skipped = 0; |
|
169 while (n-- > 0 && read() >= 0) { |
|
170 skipped++; |
|
171 } |
|
172 return skipped; |
|
173 } |
|
174 |
|
175 /** |
|
176 * Tests if this input stream supports marks. Currently this class |
|
177 * does not support marks |
|
178 */ |
|
179 @Override |
|
180 public boolean markSupported() { |
|
181 return false; // Maybe later .. |
|
182 } |
|
183 |
|
184 /** |
|
185 * Returns the number of bytes that can be read from this input |
|
186 * stream without blocking. However, this figure is only |
|
187 * a close approximation in case the original encoded stream |
|
188 * contains embedded CRLFs; since the CRLFs are discarded, not decoded |
|
189 */ |
|
190 @Override |
|
191 public int available() throws IOException { |
|
192 // This is only an estimate, since in.available() |
|
193 // might include CRLFs too .. |
|
194 return ((in.available() * 3)/4 + (bufsize-index)); |
|
195 } |
|
196 |
|
197 /** |
|
198 * This character array provides the character to value map |
|
199 * based on RFC1521. |
|
200 */ |
|
201 private final static char pem_array[] = { |
|
202 'A','B','C','D','E','F','G','H', // 0 |
|
203 'I','J','K','L','M','N','O','P', // 1 |
|
204 'Q','R','S','T','U','V','W','X', // 2 |
|
205 'Y','Z','a','b','c','d','e','f', // 3 |
|
206 'g','h','i','j','k','l','m','n', // 4 |
|
207 'o','p','q','r','s','t','u','v', // 5 |
|
208 'w','x','y','z','0','1','2','3', // 6 |
|
209 '4','5','6','7','8','9','+','/' // 7 |
|
210 }; |
|
211 |
|
212 private final static byte pem_convert_array[] = new byte[256]; |
|
213 |
|
214 static { |
|
215 for (int i = 0; i < 255; i++) { |
|
216 pem_convert_array[i] = -1; |
|
217 } |
|
218 for (int i = 0; i < pem_array.length; i++) { |
|
219 pem_convert_array[pem_array[i]] = (byte)i; |
|
220 } |
|
221 } |
|
222 |
|
223 /** |
|
224 * The decoder algorithm. Most of the complexity here is dealing |
|
225 * with error cases. Returns the number of bytes decoded, which |
|
226 * may be zero. Decoding is done by filling an int with 4 6-bit |
|
227 * values by shifting them in from the bottom and then extracting |
|
228 * 3 8-bit bytes from the int by shifting them out from the bottom. |
|
229 * |
|
230 * @param outbuf the buffer into which to put the decoded bytes |
|
231 * @param pos position in the buffer to start filling |
|
232 * @param len the number of bytes to fill |
|
233 * @return the number of bytes filled, always a multiple |
|
234 * of three, and may be zero |
|
235 * @exception IOException if the data is incorrectly formatted |
|
236 */ |
|
237 private int decode(byte[] outbuf, int pos, int len) throws IOException { |
|
238 int pos0 = pos; |
|
239 while (len >= 3) { |
|
240 /* |
|
241 * We need 4 valid base64 characters before we start decoding. |
|
242 * We skip anything that's not a valid base64 character (usually |
|
243 * just CRLF). |
|
244 */ |
|
245 int got = 0; |
|
246 int val = 0; |
|
247 while (got < 4) { |
|
248 int i = getByte(); |
|
249 if (i == -1 || i == -2) { |
|
250 boolean atEOF; |
|
251 if (i == -1) { |
|
252 if (got == 0) { |
|
253 return pos - pos0; |
|
254 } |
|
255 if (!ignoreErrors) { |
|
256 throw new DecodingException( |
|
257 "BASE64Decoder: Error in encoded stream: " + |
|
258 "needed 4 valid base64 characters " + |
|
259 "but only got " + got + " before EOF" + |
|
260 recentChars()); |
|
261 } |
|
262 atEOF = true; // don't read any more |
|
263 } else { // i == -2 |
|
264 // found a padding character, we're at EOF |
|
265 // XXX - should do something to make EOF "sticky" |
|
266 if (got < 2 && !ignoreErrors) { |
|
267 throw new DecodingException( |
|
268 "BASE64Decoder: Error in encoded stream: " + |
|
269 "needed at least 2 valid base64 characters," + |
|
270 " but only got " + got + |
|
271 " before padding character (=)" + |
|
272 recentChars()); |
|
273 } |
|
274 |
|
275 // didn't get any characters before padding character? |
|
276 if (got == 0) { |
|
277 return pos - pos0; |
|
278 } |
|
279 atEOF = false; // need to keep reading |
|
280 } |
|
281 |
|
282 // pad partial result with zeroes |
|
283 |
|
284 // how many bytes will we produce on output? |
|
285 // (got always < 4, so size always < 3) |
|
286 int size = got - 1; |
|
287 if (size == 0) { |
|
288 size = 1; |
|
289 } |
|
290 |
|
291 // handle the one padding character we've seen |
|
292 got++; |
|
293 val <<= 6; |
|
294 |
|
295 while (got < 4) { |
|
296 if (!atEOF) { |
|
297 // consume the rest of the padding characters, |
|
298 // filling with zeroes |
|
299 i = getByte(); |
|
300 if (i == -1) { |
|
301 if (!ignoreErrors) { |
|
302 throw new DecodingException( |
|
303 "BASE64Decoder: Error in encoded " + |
|
304 "stream: hit EOF while looking for " + |
|
305 "padding characters (=)" + |
|
306 recentChars()); |
|
307 } |
|
308 } else if (i != -2) { |
|
309 if (!ignoreErrors) { |
|
310 throw new DecodingException( |
|
311 "BASE64Decoder: Error in encoded " + |
|
312 "stream: found valid base64 " + |
|
313 "character after a padding character " + |
|
314 "(=)" + recentChars()); |
|
315 } |
|
316 } |
|
317 } |
|
318 val <<= 6; |
|
319 got++; |
|
320 } |
|
321 |
|
322 // now pull out however many valid bytes we got |
|
323 val >>= 8; // always skip first one |
|
324 if (size == 2) { |
|
325 outbuf[pos + 1] = (byte)(val & 0xff); |
|
326 } |
|
327 val >>= 8; |
|
328 outbuf[pos] = (byte)(val & 0xff); |
|
329 // len -= size; // not needed, return below |
|
330 pos += size; |
|
331 return pos - pos0; |
|
332 } else { |
|
333 // got a valid byte |
|
334 val <<= 6; |
|
335 got++; |
|
336 val |= i; |
|
337 } |
|
338 } |
|
339 |
|
340 // read 4 valid characters, now extract 3 bytes |
|
341 outbuf[pos + 2] = (byte)(val & 0xff); |
|
342 val >>= 8; |
|
343 outbuf[pos + 1] = (byte)(val & 0xff); |
|
344 val >>= 8; |
|
345 outbuf[pos] = (byte)(val & 0xff); |
|
346 len -= 3; |
|
347 pos += 3; |
|
348 } |
|
349 return pos - pos0; |
|
350 } |
|
351 |
|
352 /** |
|
353 * Read the next valid byte from the input stream. |
|
354 * Buffer lots of data from underlying stream in input_buffer, |
|
355 * for efficiency. |
|
356 * |
|
357 * @return the next byte, -1 on EOF, or -2 if next byte is '=' |
|
358 * (padding at end of encoded data) |
|
359 */ |
|
360 private int getByte() throws IOException { |
|
361 int c; |
|
362 do { |
|
363 if (input_pos >= input_len) { |
|
364 try { |
|
365 input_len = in.read(input_buffer); |
|
366 } catch (EOFException ex) { |
|
367 return -1; |
|
368 } |
|
369 if (input_len <= 0) { |
|
370 return -1; |
|
371 } |
|
372 input_pos = 0; |
|
373 } |
|
374 // get the next byte in the buffer |
|
375 c = input_buffer[input_pos++] & 0xff; |
|
376 // is it a padding byte? |
|
377 if (c == '=') { |
|
378 return -2; |
|
379 } |
|
380 // no, convert it |
|
381 c = pem_convert_array[c]; |
|
382 // loop until we get a legitimate byte |
|
383 } while (c == -1); |
|
384 return c; |
|
385 } |
|
386 |
|
387 /** |
|
388 * Return the most recent characters, for use in an error message. |
|
389 */ |
|
390 private String recentChars() { |
|
391 // reach into the input buffer and extract up to 10 |
|
392 // recent characters, to help in debugging. |
|
393 StringBuilder errstr = new StringBuilder(); |
|
394 int nc = input_pos > 10 ? 10 : input_pos; |
|
395 if (nc > 0) { |
|
396 errstr.append(", the ").append(nc).append(" most recent characters were: \""); |
|
397 for (int k = input_pos - nc; k < input_pos; k++) { |
|
398 char c = (char)(input_buffer[k] & 0xff); |
|
399 switch (c) { |
|
400 case '\r': errstr.append("\\r"); break; |
|
401 case '\n': errstr.append("\\n"); break; |
|
402 case '\t': errstr.append("\\t"); break; |
|
403 default: |
|
404 if (c >= ' ' && c < 0177) { |
|
405 errstr.append(c); |
|
406 } else { |
|
407 errstr.append("\\").append((int)c); |
|
408 } |
|
409 } |
|
410 } |
|
411 errstr.append("\""); |
|
412 } |
|
413 return errstr.toString(); |
|
414 } |
|
415 |
|
416 /** |
|
417 * Base64 decode a byte array. No line breaks are allowed. |
|
418 * This method is suitable for short strings, such as those |
|
419 * in the IMAP AUTHENTICATE protocol, but not to decode the |
|
420 * entire content of a MIME part. |
|
421 * |
|
422 * NOTE: inbuf may only contain valid base64 characters. |
|
423 * Whitespace is not ignored. |
|
424 */ |
|
425 public static byte[] decode(byte[] inbuf) { |
|
426 int size = (inbuf.length / 4) * 3; |
|
427 if (size == 0) { |
|
428 return inbuf; |
|
429 } |
|
430 |
|
431 if (inbuf[inbuf.length - 1] == '=') { |
|
432 size--; |
|
433 if (inbuf[inbuf.length - 2] == '=') { |
|
434 size--; |
|
435 } |
|
436 } |
|
437 byte[] outbuf = new byte[size]; |
|
438 |
|
439 int inpos = 0, outpos = 0; |
|
440 size = inbuf.length; |
|
441 while (size > 0) { |
|
442 int val; |
|
443 int osize = 3; |
|
444 val = pem_convert_array[inbuf[inpos++] & 0xff]; |
|
445 val <<= 6; |
|
446 val |= pem_convert_array[inbuf[inpos++] & 0xff]; |
|
447 val <<= 6; |
|
448 if (inbuf[inpos] != '=') { |
|
449 val |= pem_convert_array[inbuf[inpos++] & 0xff]; |
|
450 } else { |
|
451 osize--; |
|
452 } |
|
453 val <<= 6; |
|
454 if (inbuf[inpos] != '=') { |
|
455 val |= pem_convert_array[inbuf[inpos++] & 0xff]; |
|
456 } else { |
|
457 osize--; |
|
458 } |
|
459 if (osize > 2) { |
|
460 outbuf[outpos + 2] = (byte)(val & 0xff); |
|
461 } |
|
462 val >>= 8; |
|
463 if (osize > 1) { |
|
464 outbuf[outpos + 1] = (byte)(val & 0xff); |
|
465 } |
|
466 val >>= 8; |
|
467 outbuf[outpos] = (byte)(val & 0xff); |
|
468 outpos += osize; |
|
469 size -= 4; |
|
470 } |
|
471 return outbuf; |
|
472 } |
|
473 |
|
474 /*** begin TEST program *** |
|
475 public static void main(String argv[]) throws Exception { |
|
476 FileInputStream infile = new FileInputStream(argv[0]); |
|
477 BASE64DecoderStream decoder = new BASE64DecoderStream(infile); |
|
478 int c; |
|
479 |
|
480 while ((c = decoder.read()) != -1) |
|
481 System.out.print((char)c); |
|
482 System.out.flush(); |
|
483 } |
|
484 *** end TEST program ***/ |
|
485 } |