src/share/jaxws_classes/com/sun/xml/internal/org/jvnet/mimepull/MIMEParser.java

Fri, 04 Oct 2013 16:21:34 +0100

author
mkos
date
Fri, 04 Oct 2013 16:21:34 +0100
changeset 408
b0610cd08440
parent 368
0989ad8c0860
child 637
9c07ef4934dd
permissions
-rw-r--r--

8025054: Update JAX-WS RI integration to 2.2.9-b130926.1035
Reviewed-by: chegar

     1 /*
     2  * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  */
    26 package com.sun.xml.internal.org.jvnet.mimepull;
    28 import java.io.InputStream;
    29 import java.io.IOException;
    30 import java.util.*;
    31 import java.util.logging.Logger;
    32 import java.nio.ByteBuffer;
    33 import java.util.logging.Level;
    35 /**
    36  * Pull parser for the MIME messages. Applications can use pull API to continue
    37  * the parsing MIME messages lazily.
    38  *
    39  * <pre>
    40  * for e.g.:
    41  * <p>
    42  *
    43  * MIMEParser parser = ...
    44  * Iterator<MIMEEvent> it = parser.iterator();
    45  * while(it.hasNext()) {
    46  *   MIMEEvent event = it.next();
    47  *   ...
    48  * }
    49  * </pre>
    50  *
    51  * @author Jitendra Kotamraju
    52  */
    53 class MIMEParser implements Iterable<MIMEEvent> {
    55     private static final Logger LOGGER = Logger.getLogger(MIMEParser.class.getName());
    57     private static final String HEADER_ENCODING = "ISO8859-1";
    59     // Actually, the grammar doesn't support whitespace characters
    60     // after boundary. But the mail implementation checks for it.
    61     // We will only check for these many whitespace characters after boundary
    62     private static final int NO_LWSP = 1000;
    63     private enum STATE {START_MESSAGE, SKIP_PREAMBLE, START_PART, HEADERS, BODY, END_PART, END_MESSAGE}
    64     private STATE state = STATE.START_MESSAGE;
    66     private final InputStream in;
    67     private final byte[] bndbytes;
    68     private final int bl;
    69     private final MIMEConfig config;
    70     private final int[] bcs = new int[128]; // BnM algo: Bad Character Shift table
    71     private final int[] gss;                // BnM algo : Good Suffix Shift table
    73     /**
    74      * Have we parsed the data from our InputStream yet?
    75      */
    76     private boolean parsed;
    78     /*
    79      * Read and process body partsList until we see the
    80      * terminating boundary line (or EOF).
    81      */
    82     private boolean done = false;
    84     private boolean eof;
    85     private final int capacity;
    86     private byte[] buf;
    87     private int len;
    88     private boolean bol;        // beginning of the line
    90     /*
    91      * Parses the MIME content. At the EOF, it also closes input stream
    92      */
    93     MIMEParser(InputStream in, String boundary, MIMEConfig config) {
    94         this.in = in;
    95         this.bndbytes = getBytes("--"+boundary);
    96         bl = bndbytes.length;
    97         this.config = config;
    98         gss = new int[bl];
    99         compileBoundaryPattern();
   101         // \r\n + boundary + "--\r\n" + lots of LWSP
   102         capacity = config.chunkSize+2+bl+4+NO_LWSP;
   103         createBuf(capacity);
   104     }
   106     /**
   107      * Returns iterator for the parsing events. Use the iterator to advance
   108      * the parsing.
   109      *
   110      * @return iterator for parsing events
   111      */
   112     @Override
   113     public Iterator<MIMEEvent> iterator() {
   114         return new MIMEEventIterator();
   115     }
   117     class MIMEEventIterator implements Iterator<MIMEEvent> {
   119         @Override
   120         public boolean hasNext() {
   121             return !parsed;
   122         }
   124         @Override
   125         public MIMEEvent next() {
   126             switch(state) {
   127                 case START_MESSAGE :
   128                     if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.START_MESSAGE);}
   129                     state = STATE.SKIP_PREAMBLE;
   130                     return MIMEEvent.START_MESSAGE;
   132                 case SKIP_PREAMBLE :
   133                     if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.SKIP_PREAMBLE);}
   134                     skipPreamble();
   135                     // fall through
   136                 case START_PART :
   137                     if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.START_PART);}
   138                     state = STATE.HEADERS;
   139                     return MIMEEvent.START_PART;
   141                 case HEADERS :
   142                     if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.HEADERS);}
   143                     InternetHeaders ih = readHeaders();
   144                     state = STATE.BODY;
   145                     bol = true;
   146                     return new MIMEEvent.Headers(ih);
   148                 case BODY :
   149                     if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.BODY);}
   150                     ByteBuffer buf = readBody();
   151                     bol = false;
   152                     return new MIMEEvent.Content(buf);
   154                 case END_PART :
   155                     if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.END_PART);}
   156                     if (done) {
   157                         state = STATE.END_MESSAGE;
   158                     } else {
   159                         state = STATE.START_PART;
   160                     }
   161                     return MIMEEvent.END_PART;
   163                 case END_MESSAGE :
   164                     if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "MIMEParser state={0}", STATE.END_MESSAGE);}
   165                     parsed = true;
   166                     return MIMEEvent.END_MESSAGE;
   168                 default :
   169                     throw new MIMEParsingException("Unknown Parser state = "+state);
   170             }
   171         }
   173         @Override
   174         public void remove() {
   175             throw new UnsupportedOperationException();
   176         }
   177     }
   179     /**
   180      * Collects the headers for the current part by parsing mesage stream.
   181      *
   182      * @return headers for the current part
   183      */
   184     private InternetHeaders readHeaders() {
   185         if (!eof) {
   186             fillBuf();
   187         }
   188         return new InternetHeaders(new LineInputStream());
   189     }
   191     /**
   192      * Reads and saves the part of the current attachment part's content.
   193      * At the end of this method, buf should have the remaining data
   194      * at index 0.
   195      *
   196      * @return a chunk of the part's content
   197      *
   198      */
   199     private ByteBuffer readBody() {
   200         if (!eof) {
   201             fillBuf();
   202         }
   203         int start = match(buf, 0, len);     // matches boundary
   204         if (start == -1) {
   205             // No boundary is found
   206             assert eof || len >= config.chunkSize;
   207             int chunkSize = eof ? len : config.chunkSize;
   208             if (eof) {
   209                 done = true;
   210                 throw new MIMEParsingException("Reached EOF, but there is no closing MIME boundary.");
   211             }
   212             return adjustBuf(chunkSize, len-chunkSize);
   213         }
   214         // Found boundary.
   215         // Is it at the start of a line ?
   216         int chunkLen = start;
   217         if (bol && start == 0) {
   218             // nothing to do
   219         } else if (start > 0 && (buf[start-1] == '\n' || buf[start-1] =='\r')) {
   220             --chunkLen;
   221             if (buf[start-1] == '\n' && start >1 && buf[start-2] == '\r') {
   222                 --chunkLen;
   223             }
   224         } else {
   225            return adjustBuf(start+1, len-start-1);  // boundary is not at beginning of a line
   226         }
   228         if (start+bl+1 < len && buf[start+bl] == '-' && buf[start+bl+1] == '-') {
   229             state = STATE.END_PART;
   230             done = true;
   231             return adjustBuf(chunkLen, 0);
   232         }
   234         // Consider all the whitespace in boundary+whitespace+"\r\n"
   235         int lwsp = 0;
   236         for(int i=start+bl; i < len && (buf[i] == ' ' || buf[i] == '\t'); i++) {
   237             ++lwsp;
   238         }
   240         // Check for \n or \r\n in boundary+whitespace+"\n" or boundary+whitespace+"\r\n"
   241         if (start+bl+lwsp < len && buf[start+bl+lwsp] == '\n') {
   242             state = STATE.END_PART;
   243             return adjustBuf(chunkLen, len-start-bl-lwsp-1);
   244         } else if (start+bl+lwsp+1 < len && buf[start+bl+lwsp] == '\r' && buf[start+bl+lwsp+1] == '\n') {
   245             state = STATE.END_PART;
   246             return adjustBuf(chunkLen, len-start-bl-lwsp-2);
   247         } else if (start+bl+lwsp+1 < len) {
   248             return adjustBuf(chunkLen+1, len-chunkLen-1);       // boundary string in a part data
   249         } else if (eof) {
   250             done = true;
   251             throw new MIMEParsingException("Reached EOF, but there is no closing MIME boundary.");
   252         }
   254         // Some more data needed to determine if it is indeed a proper boundary
   255         return adjustBuf(chunkLen, len-chunkLen);
   256     }
   258     /**
   259      * Returns a chunk from the original buffer. A new buffer is
   260      * created with the remaining bytes.
   261      *
   262      * @param chunkSize create a chunk with these many bytes
   263      * @param remaining bytes from the end of the buffer that need to be copied to
   264      *        the beginning of the new buffer
   265      * @return chunk
   266      */
   267     private ByteBuffer adjustBuf(int chunkSize, int remaining) {
   268         assert buf != null;
   269         assert chunkSize >= 0;
   270         assert remaining >= 0;
   272         byte[] temp = buf;
   273         // create a new buf and adjust it without this chunk
   274         createBuf(remaining);
   275         System.arraycopy(temp, len-remaining, buf, 0, remaining);
   276         len = remaining;
   278         return ByteBuffer.wrap(temp, 0, chunkSize);
   279     }
   281     private void createBuf(int min) {
   282         buf = new byte[min < capacity ? capacity : min];
   283     }
   285     /**
   286      * Skips the preamble to find the first attachment part
   287      */
   288     private void skipPreamble() {
   290         while(true) {
   291             if (!eof) {
   292                 fillBuf();
   293             }
   294             int start = match(buf, 0, len);     // matches boundary
   295             if (start == -1) {
   296                 // No boundary is found
   297                 if (eof) {
   298                     throw new MIMEParsingException("Missing start boundary");
   299                 } else {
   300                     adjustBuf(len-bl+1, bl-1);
   301                     continue;
   302                 }
   303             }
   305             if (start > config.chunkSize) {
   306                 adjustBuf(start, len-start);
   307                 continue;
   308             }
   309             // Consider all the whitespace boundary+whitespace+"\r\n"
   310             int lwsp = 0;
   311             for(int i=start+bl; i < len && (buf[i] == ' ' || buf[i] == '\t'); i++) {
   312                 ++lwsp;
   313             }
   314             // Check for \n or \r\n
   315             if (start+bl+lwsp < len && (buf[start+bl+lwsp] == '\n' || buf[start+bl+lwsp] == '\r') ) {
   316                 if (buf[start+bl+lwsp] == '\n') {
   317                     adjustBuf(start+bl+lwsp+1, len-start-bl-lwsp-1);
   318                     break;
   319                 } else if (start+bl+lwsp+1 < len && buf[start+bl+lwsp+1] == '\n') {
   320                     adjustBuf(start+bl+lwsp+2, len-start-bl-lwsp-2);
   321                     break;
   322                 }
   323             }
   324             adjustBuf(start+1, len-start-1);
   325         }
   326         if (LOGGER.isLoggable(Level.FINE)) {LOGGER.log(Level.FINE, "Skipped the preamble. buffer len={0}", len);}
   327     }
   329     private static byte[] getBytes(String s) {
   330         char [] chars= s.toCharArray();
   331         int size = chars.length;
   332         byte[] bytes = new byte[size];
   334         for (int i = 0; i < size;) {
   335             bytes[i] = (byte) chars[i++];
   336         }
   337         return bytes;
   338     }
   340         /**
   341      * Boyer-Moore search method. Copied from java.util.regex.Pattern.java
   342      *
   343      * Pre calculates arrays needed to generate the bad character
   344      * shift and the good suffix shift. Only the last seven bits
   345      * are used to see if chars match; This keeps the tables small
   346      * and covers the heavily used ASCII range, but occasionally
   347      * results in an aliased match for the bad character shift.
   348      */
   349     private void compileBoundaryPattern() {
   350         int i, j;
   352         // Precalculate part of the bad character shift
   353         // It is a table for where in the pattern each
   354         // lower 7-bit value occurs
   355         for (i = 0; i < bndbytes.length; i++) {
   356             bcs[bndbytes[i]&0x7F] = i + 1;
   357         }
   359         // Precalculate the good suffix shift
   360         // i is the shift amount being considered
   361 NEXT:   for (i = bndbytes.length; i > 0; i--) {
   362             // j is the beginning index of suffix being considered
   363             for (j = bndbytes.length - 1; j >= i; j--) {
   364                 // Testing for good suffix
   365                 if (bndbytes[j] == bndbytes[j-i]) {
   366                     // src[j..len] is a good suffix
   367                     gss[j-1] = i;
   368                 } else {
   369                     // No match. The array has already been
   370                     // filled up with correct values before.
   371                     continue NEXT;
   372                 }
   373             }
   374             // This fills up the remaining of optoSft
   375             // any suffix can not have larger shift amount
   376             // then its sub-suffix. Why???
   377             while (j > 0) {
   378                 gss[--j] = i;
   379             }
   380         }
   381         // Set the guard value because of unicode compression
   382         gss[bndbytes.length -1] = 1;
   383     }
   385     /**
   386      * Finds the boundary in the given buffer using Boyer-Moore algo.
   387      * Copied from java.util.regex.Pattern.java
   388      *
   389      * @param mybuf boundary to be searched in this mybuf
   390      * @param off start index in mybuf
   391      * @param len number of bytes in mybuf
   392      *
   393      * @return -1 if there is no match or index where the match starts
   394      */
   395     private int match(byte[] mybuf, int off, int len) {
   396         int last = len - bndbytes.length;
   398         // Loop over all possible match positions in text
   399 NEXT:   while (off <= last) {
   400             // Loop over pattern from right to left
   401             for (int j = bndbytes.length - 1; j >= 0; j--) {
   402                 byte ch = mybuf[off+j];
   403                 if (ch != bndbytes[j]) {
   404                     // Shift search to the right by the maximum of the
   405                     // bad character shift and the good suffix shift
   406                     off += Math.max(j + 1 - bcs[ch&0x7F], gss[j]);
   407                     continue NEXT;
   408                 }
   409             }
   410             // Entire pattern matched starting at off
   411             return off;
   412         }
   413         return -1;
   414     }
   416     /**
   417      * Fills the remaining buf to the full capacity
   418      */
   419     private void fillBuf() {
   420         if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "Before fillBuf() buffer len={0}", len);}
   421         assert !eof;
   422         while(len < buf.length) {
   423             int read;
   424             try {
   425                 read = in.read(buf, len, buf.length-len);
   426             } catch(IOException ioe) {
   427                 throw new MIMEParsingException(ioe);
   428             }
   429             if (read == -1) {
   430                 eof = true;
   431                 try {
   432                     if (LOGGER.isLoggable(Level.FINE)) {LOGGER.fine("Closing the input stream.");}
   433                     in.close();
   434                 } catch(IOException ioe) {
   435                     throw new MIMEParsingException(ioe);
   436                 }
   437                 break;
   438             } else {
   439                 len += read;
   440             }
   441         }
   442         if (LOGGER.isLoggable(Level.FINER)) {LOGGER.log(Level.FINER, "After fillBuf() buffer len={0}", len);}
   443     }
   445     private void doubleBuf() {
   446         byte[] temp = new byte[2*len];
   447         System.arraycopy(buf, 0, temp, 0, len);
   448         buf = temp;
   449         if (!eof) {
   450             fillBuf();
   451         }
   452     }
   454     class LineInputStream {
   455         private int offset;
   457         /*
   458          * Read a line containing only ASCII characters from the input
   459          * stream. A line is terminated by a CR or NL or CR-NL sequence.
   460          * A common error is a CR-CR-NL sequence, which will also terminate
   461          * a line.
   462          * The line terminator is not returned as part of the returned
   463          * String. Returns null if no data is available. <p>
   464          *
   465          * This class is similar to the deprecated
   466          * <code>DataInputStream.readLine()</code>
   467          */
   468         public String readLine() throws IOException {
   470             int hdrLen = 0;
   471             int lwsp = 0;
   472             while(offset+hdrLen < len) {
   473                 if (buf[offset+hdrLen] == '\n') {
   474                     lwsp = 1;
   475                     break;
   476                 }
   477                 if (offset+hdrLen+1 == len) {
   478                     doubleBuf();
   479                 }
   480                 if (offset+hdrLen+1 >= len) {   // No more data in the stream
   481                     assert eof;
   482                     return null;
   483                 }
   484                 if (buf[offset+hdrLen] == '\r' && buf[offset+hdrLen+1] == '\n') {
   485                     lwsp = 2;
   486                     break;
   487                 }
   488                 ++hdrLen;
   489             }
   490             if (hdrLen == 0) {
   491                 adjustBuf(offset+lwsp, len-offset-lwsp);
   492                 return null;
   493             }
   495             String hdr = new String(buf, offset, hdrLen, HEADER_ENCODING);
   496             offset += hdrLen+lwsp;
   497             return hdr;
   498         }
   500     }
   502 }

mercurial