Fri, 04 Oct 2013 16:21:34 +0100
8025054: Update JAX-WS RI integration to 2.2.9-b130926.1035
Reviewed-by: chegar
1 /*
2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
26 package com.sun.xml.internal.org.jvnet.mimepull;
28 import java.io.InputStream;
29 import java.io.IOException;
30 import java.util.*;
31 import java.util.logging.Logger;
32 import java.nio.ByteBuffer;
33 import java.util.logging.Level;
35 /**
36 * Pull parser for the MIME messages. Applications can use pull API to continue
37 * the parsing MIME messages lazily.
38 *
39 * <pre>
40 * for e.g.:
41 * <p>
42 *
43 * MIMEParser parser = ...
44 * Iterator<MIMEEvent> it = parser.iterator();
45 * while(it.hasNext()) {
46 * MIMEEvent event = it.next();
47 * ...
48 * }
49 * </pre>
50 *
51 * @author Jitendra Kotamraju
52 */
53 class MIMEParser implements Iterable<MIMEEvent> {
55 private static final Logger LOGGER = Logger.getLogger(MIMEParser.class.getName());
57 private static final String HEADER_ENCODING = "ISO8859-1";
59 // Actually, the grammar doesn't support whitespace characters
60 // after boundary. But the mail implementation checks for it.
61 // We will only check for these many whitespace characters after boundary
62 private static final int NO_LWSP = 1000;
63 private enum STATE {START_MESSAGE, SKIP_PREAMBLE, START_PART, HEADERS, BODY, END_PART, END_MESSAGE}
64 private STATE state = STATE.START_MESSAGE;
66 private final InputStream in;
67 private final byte[] bndbytes;
68 private final int bl;
69 private final MIMEConfig config;
70 private final int[] bcs = new int[128]; // BnM algo: Bad Character Shift table
71 private final int[] gss; // BnM algo : Good Suffix Shift table
73 /**
74 * Have we parsed the data from our InputStream yet?
75 */
76 private boolean parsed;
78 /*
79 * Read and process body partsList until we see the
80 * terminating boundary line (or EOF).
81 */
82 private boolean done = false;
84 private boolean eof;
85 private final int capacity;
86 private byte[] buf;
87 private int len;
88 private boolean bol; // beginning of the line
90 /*
91 * Parses the MIME content. At the EOF, it also closes input stream
92 */
93 MIMEParser(InputStream in, String boundary, MIMEConfig config) {
94 this.in = in;
95 this.bndbytes = getBytes("--"+boundary);
96 bl = bndbytes.length;
97 this.config = config;
98 gss = new int[bl];
99 compileBoundaryPattern();
101 // \r\n + boundary + "--\r\n" + lots of LWSP
102 capacity = config.chunkSize+2+bl+4+NO_LWSP;
103 createBuf(capacity);
104 }
106 /**
107 * Returns iterator for the parsing events. Use the iterator to advance
108 * the parsing.
109 *
110 * @return iterator for parsing events
111 */
112 @Override
113 public Iterator<MIMEEvent> iterator() {
114 return new MIMEEventIterator();
115 }
117 class MIMEEventIterator implements Iterator<MIMEEvent> {
119 @Override
120 public boolean hasNext() {
121 return !parsed;
122 }
124 @Override
125 public MIMEEvent next() {
126 switch(state) {
127 case START_MESSAGE :
128 if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.START_MESSAGE);}
129 state = STATE.SKIP_PREAMBLE;
130 return MIMEEvent.START_MESSAGE;
132 case SKIP_PREAMBLE :
133 if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.SKIP_PREAMBLE);}
134 skipPreamble();
135 // fall through
136 case START_PART :
137 if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.START_PART);}
138 state = STATE.HEADERS;
139 return MIMEEvent.START_PART;
141 case HEADERS :
142 if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.HEADERS);}
143 InternetHeaders ih = readHeaders();
144 state = STATE.BODY;
145 bol = true;
146 return new MIMEEvent.Headers(ih);
148 case BODY :
149 if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.BODY);}
150 ByteBuffer buf = readBody();
151 bol = false;
152 return new MIMEEvent.Content(buf);
154 case END_PART :
155 if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.END_PART);}
156 if (done) {
157 state = STATE.END_MESSAGE;
158 } else {
159 state = STATE.START_PART;
160 }
161 return MIMEEvent.END_PART;
163 case END_MESSAGE :
164 if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.END_MESSAGE);}
165 parsed = true;
166 return MIMEEvent.END_MESSAGE;
168 default :
169 throw new MIMEParsingException("Unknown Parser state = "+state);
170 }
171 }
173 @Override
174 public void remove() {
175 throw new UnsupportedOperationException();
176 }
177 }
179 /**
180 * Collects the headers for the current part by parsing mesage stream.
181 *
182 * @return headers for the current part
183 */
184 private InternetHeaders readHeaders() {
185 if (!eof) {
186 fillBuf();
187 }
188 return new InternetHeaders(new LineInputStream());
189 }
191 /**
192 * Reads and saves the part of the current attachment part's content.
193 * At the end of this method, buf should have the remaining data
194 * at index 0.
195 *
196 * @return a chunk of the part's content
197 *
198 */
199 private ByteBuffer readBody() {
200 if (!eof) {
201 fillBuf();
202 }
203 int start = match(buf, 0, len); // matches boundary
204 if (start == -1) {
205 // No boundary is found
206 assert eof || len >= config.chunkSize;
207 int chunkSize = eof ? len : config.chunkSize;
208 if (eof) {
209 done = true;
210 throw new MIMEParsingException("Reached EOF, but there is no closing MIME boundary.");
211 }
212 return adjustBuf(chunkSize, len-chunkSize);
213 }
214 // Found boundary.
215 // Is it at the start of a line ?
216 int chunkLen = start;
217 if (bol && start == 0) {
218 // nothing to do
219 } else if (start > 0 && (buf[start-1] == '\n' || buf[start-1] =='\r')) {
220 --chunkLen;
221 if (buf[start-1] == '\n' && start >1 && buf[start-2] == '\r') {
222 --chunkLen;
223 }
224 } else {
225 return adjustBuf(start+1, len-start-1); // boundary is not at beginning of a line
226 }
228 if (start+bl+1 < len && buf[start+bl] == '-' && buf[start+bl+1] == '-') {
229 state = STATE.END_PART;
230 done = true;
231 return adjustBuf(chunkLen, 0);
232 }
234 // Consider all the whitespace in boundary+whitespace+"\r\n"
235 int lwsp = 0;
236 for(int i=start+bl; i < len && (buf[i] == ' ' || buf[i] == '\t'); i++) {
237 ++lwsp;
238 }
240 // Check for \n or \r\n in boundary+whitespace+"\n" or boundary+whitespace+"\r\n"
241 if (start+bl+lwsp < len && buf[start+bl+lwsp] == '\n') {
242 state = STATE.END_PART;
243 return adjustBuf(chunkLen, len-start-bl-lwsp-1);
244 } else if (start+bl+lwsp+1 < len && buf[start+bl+lwsp] == '\r' && buf[start+bl+lwsp+1] == '\n') {
245 state = STATE.END_PART;
246 return adjustBuf(chunkLen, len-start-bl-lwsp-2);
247 } else if (start+bl+lwsp+1 < len) {
248 return adjustBuf(chunkLen+1, len-chunkLen-1); // boundary string in a part data
249 } else if (eof) {
250 done = true;
251 throw new MIMEParsingException("Reached EOF, but there is no closing MIME boundary.");
252 }
254 // Some more data needed to determine if it is indeed a proper boundary
255 return adjustBuf(chunkLen, len-chunkLen);
256 }
258 /**
259 * Returns a chunk from the original buffer. A new buffer is
260 * created with the remaining bytes.
261 *
262 * @param chunkSize create a chunk with these many bytes
263 * @param remaining bytes from the end of the buffer that need to be copied to
264 * the beginning of the new buffer
265 * @return chunk
266 */
267 private ByteBuffer adjustBuf(int chunkSize, int remaining) {
268 assert buf != null;
269 assert chunkSize >= 0;
270 assert remaining >= 0;
272 byte[] temp = buf;
273 // create a new buf and adjust it without this chunk
274 createBuf(remaining);
275 System.arraycopy(temp, len-remaining, buf, 0, remaining);
276 len = remaining;
278 return ByteBuffer.wrap(temp, 0, chunkSize);
279 }
281 private void createBuf(int min) {
282 buf = new byte[min < capacity ? capacity : min];
283 }
285 /**
286 * Skips the preamble to find the first attachment part
287 */
288 private void skipPreamble() {
290 while(true) {
291 if (!eof) {
292 fillBuf();
293 }
294 int start = match(buf, 0, len); // matches boundary
295 if (start == -1) {
296 // No boundary is found
297 if (eof) {
298 throw new MIMEParsingException("Missing start boundary");
299 } else {
300 adjustBuf(len-bl+1, bl-1);
301 continue;
302 }
303 }
305 if (start > config.chunkSize) {
306 adjustBuf(start, len-start);
307 continue;
308 }
309 // Consider all the whitespace boundary+whitespace+"\r\n"
310 int lwsp = 0;
311 for(int i=start+bl; i < len && (buf[i] == ' ' || buf[i] == '\t'); i++) {
312 ++lwsp;
313 }
314 // Check for \n or \r\n
315 if (start+bl+lwsp < len && (buf[start+bl+lwsp] == '\n' || buf[start+bl+lwsp] == '\r') ) {
316 if (buf[start+bl+lwsp] == '\n') {
317 adjustBuf(start+bl+lwsp+1, len-start-bl-lwsp-1);
318 break;
319 } else if (start+bl+lwsp+1 < len && buf[start+bl+lwsp+1] == '\n') {
320 adjustBuf(start+bl+lwsp+2, len-start-bl-lwsp-2);
321 break;
322 }
323 }
324 adjustBuf(start+1, len-start-1);
325 }
326 if (LOGGER.isLoggable(Level.FINE)) {LOGGER.log(Level.FINE, "Skipped the preamble. buffer len={0}", len);}
327 }
329 private static byte[] getBytes(String s) {
330 char [] chars= s.toCharArray();
331 int size = chars.length;
332 byte[] bytes = new byte[size];
334 for (int i = 0; i < size;) {
335 bytes[i] = (byte) chars[i++];
336 }
337 return bytes;
338 }
340 /**
341 * Boyer-Moore search method. Copied from java.util.regex.Pattern.java
342 *
343 * Pre calculates arrays needed to generate the bad character
344 * shift and the good suffix shift. Only the last seven bits
345 * are used to see if chars match; This keeps the tables small
346 * and covers the heavily used ASCII range, but occasionally
347 * results in an aliased match for the bad character shift.
348 */
349 private void compileBoundaryPattern() {
350 int i, j;
352 // Precalculate part of the bad character shift
353 // It is a table for where in the pattern each
354 // lower 7-bit value occurs
355 for (i = 0; i < bndbytes.length; i++) {
356 bcs[bndbytes[i]&0x7F] = i + 1;
357 }
359 // Precalculate the good suffix shift
360 // i is the shift amount being considered
361 NEXT: for (i = bndbytes.length; i > 0; i--) {
362 // j is the beginning index of suffix being considered
363 for (j = bndbytes.length - 1; j >= i; j--) {
364 // Testing for good suffix
365 if (bndbytes[j] == bndbytes[j-i]) {
366 // src[j..len] is a good suffix
367 gss[j-1] = i;
368 } else {
369 // No match. The array has already been
370 // filled up with correct values before.
371 continue NEXT;
372 }
373 }
374 // This fills up the remaining of optoSft
375 // any suffix can not have larger shift amount
376 // then its sub-suffix. Why???
377 while (j > 0) {
378 gss[--j] = i;
379 }
380 }
381 // Set the guard value because of unicode compression
382 gss[bndbytes.length -1] = 1;
383 }
385 /**
386 * Finds the boundary in the given buffer using Boyer-Moore algo.
387 * Copied from java.util.regex.Pattern.java
388 *
389 * @param mybuf boundary to be searched in this mybuf
390 * @param off start index in mybuf
391 * @param len number of bytes in mybuf
392 *
393 * @return -1 if there is no match or index where the match starts
394 */
395 private int match(byte[] mybuf, int off, int len) {
396 int last = len - bndbytes.length;
398 // Loop over all possible match positions in text
399 NEXT: while (off <= last) {
400 // Loop over pattern from right to left
401 for (int j = bndbytes.length - 1; j >= 0; j--) {
402 byte ch = mybuf[off+j];
403 if (ch != bndbytes[j]) {
404 // Shift search to the right by the maximum of the
405 // bad character shift and the good suffix shift
406 off += Math.max(j + 1 - bcs[ch&0x7F], gss[j]);
407 continue NEXT;
408 }
409 }
410 // Entire pattern matched starting at off
411 return off;
412 }
413 return -1;
414 }
416 /**
417 * Fills the remaining buf to the full capacity
418 */
419 private void fillBuf() {
420 if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "Before fillBuf() buffer len={0}", len);}
421 assert !eof;
422 while(len < buf.length) {
423 int read;
424 try {
425 read = in.read(buf, len, buf.length-len);
426 } catch(IOException ioe) {
427 throw new MIMEParsingException(ioe);
428 }
429 if (read == -1) {
430 eof = true;
431 try {
432 if (LOGGER.isLoggable(Level.FINE)) {LOGGER.fine("Closing the input stream.");}
433 in.close();
434 } catch(IOException ioe) {
435 throw new MIMEParsingException(ioe);
436 }
437 break;
438 } else {
439 len += read;
440 }
441 }
442 if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "After fillBuf() buffer len={0}", len);}
443 }
445 private void doubleBuf() {
446 byte[] temp = new byte[2*len];
447 System.arraycopy(buf, 0, temp, 0, len);
448 buf = temp;
449 if (!eof) {
450 fillBuf();
451 }
452 }
454 class LineInputStream {
455 private int offset;
457 /*
458 * Read a line containing only ASCII characters from the input
459 * stream. A line is terminated by a CR or NL or CR-NL sequence.
460 * A common error is a CR-CR-NL sequence, which will also terminate
461 * a line.
462 * The line terminator is not returned as part of the returned
463 * String. Returns null if no data is available. <p>
464 *
465 * This class is similar to the deprecated
466 * <code>DataInputStream.readLine()</code>
467 */
468 public String readLine() throws IOException {
470 int hdrLen = 0;
471 int lwsp = 0;
472 while(offset+hdrLen < len) {
473 if (buf[offset+hdrLen] == '\n') {
474 lwsp = 1;
475 break;
476 }
477 if (offset+hdrLen+1 == len) {
478 doubleBuf();
479 }
480 if (offset+hdrLen+1 >= len) { // No more data in the stream
481 assert eof;
482 return null;
483 }
484 if (buf[offset+hdrLen] == '\r' && buf[offset+hdrLen+1] == '\n') {
485 lwsp = 2;
486 break;
487 }
488 ++hdrLen;
489 }
490 if (hdrLen == 0) {
491 adjustBuf(offset+lwsp, len-offset-lwsp);
492 return null;
493 }
495 String hdr = new String(buf, offset, hdrLen, HEADER_ENCODING);
496 offset += hdrLen+lwsp;
497 return hdr;
498 }
500 }
502 }