jdk8-mips64-public/jaxws: src/share/jaxws_classes/com/sun/xml/internal/messaging/saaj/packaging/mime/internet/MimeUtility.java@9c07ef4934dd

     1 /*

     2  * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.  Oracle designates this

     8  * particular file as subject to the "Classpath" exception as provided

     9  * by Oracle in the LICENSE file that accompanied this code.

    10  *

    11  * This code is distributed in the hope that it will be useful, but WITHOUT

    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    14  * version 2 for more details (a copy is included in the LICENSE file that

    15  * accompanied this code).

    16  *

    17  * You should have received a copy of the GNU General Public License version

    18  * 2 along with this work; if not, write to the Free Software Foundation,

    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    20  *

    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    22  * or visit www.oracle.com if you need additional information or have any

    23  * questions.

    24  */

    26 /*

    27  * @(#)MimeUtility.java       1.45 03/03/10

    28  */

    32 package com.sun.xml.internal.messaging.saaj.packaging.mime.internet;

    34 import java.io.*;

    35 import java.util.*;

    37 import javax.activation.DataHandler;

    38 import javax.activation.DataSource;

    40 import com.sun.xml.internal.messaging.saaj.packaging.mime.MessagingException;

    41 import com.sun.xml.internal.messaging.saaj.packaging.mime.util.*;

    42 import com.sun.xml.internal.messaging.saaj.util.SAAJUtil;

    44 /**

    45  * This is a utility class that provides various MIME related

    46  * functionality. <p>

    47  *

    48  * There are a set of methods to encode and decode MIME headers as

    49  * per RFC 2047. A brief description on handling such headers is

    50  * given below: <p>

    51  *

    52  * RFC 822 mail headers <strong>must</strong> contain only US-ASCII

    53  * characters. Headers that contain non US-ASCII characters must be

    54  * encoded so that they contain only US-ASCII characters. Basically,

    55  * this process involves using either BASE64 or QP to encode certain

    56  * characters. RFC 2047 describes this in detail. <p>

    57  *

    58  * In Java, Strings contain (16 bit) Unicode characters. ASCII is a

    59  * subset of Unicode (and occupies the range 0 - 127). A String

    60  * that contains only ASCII characters is already mail-safe. If the

    61  * String contains non US-ASCII characters, it must be encoded. An

    62  * additional complexity in this step is that since Unicode is not

    63  * yet a widely used charset, one might want to first charset-encode

    64  * the String into another charset and then do the transfer-encoding.

    65  * <p>

    66  * Note that to get the actual bytes of a mail-safe String (say,

    67  * for sending over SMTP), one must do

    68  * <p><blockquote><pre>

    69  *

    70  *      byte[] bytes = string.getBytes("iso-8859-1");

    71  *

    72  * </pre></blockquote><p>

    73  *

    74  * The <code>setHeader</code> and <code>addHeader</code> methods

    75  * on MimeMessage and MimeBodyPart assume that the given header values

    76  * are Unicode strings that contain only US-ASCII characters. Hence

    77  * the callers of those methods must insure that the values they pass

    78  * do not contain non US-ASCII characters. The methods in this class

    79  * help do this. <p>

    80  *

    81  * The <code>getHeader</code> family of methods on MimeMessage and

    82  * MimeBodyPart return the raw header value. These might be encoded

    83  * as per RFC 2047, and if so, must be decoded into Unicode Strings.

    84  * The methods in this class help to do this. <p>

    85  *

    86  * Several System properties control strict conformance to the MIME

    87  * spec.  Note that these are not session properties but must be set

    88  * globally as System properties. <p>

    89  *

    90  * The <code>mail.mime.decodetext.strict</code> property controls

    91  * decoding of MIME encoded words.  The MIME spec requires that encoded

    92  * words start at the beginning of a whitespace separated word.  Some

    93  * mailers incorrectly include encoded words in the middle of a word.

    94  * If the <code>mail.mime.decodetext.strict</code> System property is

    95  * set to <code>"false"</code>, an attempt will be made to decode these

    96  * illegal encoded words. The default is true. <p>

    97  *

    98  * The <code>mail.mime.encodeeol.strict</code> property controls the

    99  * choice of Content-Transfer-Encoding for MIME parts that are not of

   100  * type "text".  Often such parts will contain textual data for which

   101  * an encoding that allows normal end of line conventions is appropriate.

   102  * In rare cases, such a part will appear to contain entirely textual

   103  * data, but will require an encoding that preserves CR and LF characters

   104  * without change.  If the <code>mail.mime.decodetext.strict</code>

   105  * System property is set to <code>"true"</code>, such an encoding will

   106  * be used when necessary.  The default is false. <p>

   107  *

   108  * In addition, the <code>mail.mime.charset</code> System property can

   109  * be used to specify the default MIME charset to use for encoded words

   110  * and text parts that don't otherwise specify a charset.  Normally, the

   111  * default MIME charset is derived from the default Java charset, as

   112  * specified in the <code>file.encoding</code> System property.  Most

   113  * applications will have no need to explicitly set the default MIME

   114  * charset.  In cases where the default MIME charset to be used for

   115  * mail messages is different than the charset used for files stored on

   116  * the system, this property should be set.

   117  *

   118  * @version 1.45, 03/03/10

   119  * @author  John Mani

   120  * @author  Bill Shannon

   121  */

   123 public class MimeUtility {

   125     // This class cannot be instantiated

   126     private MimeUtility() { }

   128     public static final int ALL = -1;

   130     private static final int BUFFER_SIZE = 1024;

   131     private static boolean decodeStrict = true;

   132     private static boolean encodeEolStrict = false;

   133     private static boolean foldEncodedWords = false;

   134     private static boolean foldText = true;

   136     static {

   137         try {

   138             String s = SAAJUtil.getSystemProperty("mail.mime.decodetext.strict");

   139             // default to true

   140             decodeStrict = s == null || !s.equalsIgnoreCase("false");

   141             s = SAAJUtil.getSystemProperty("mail.mime.encodeeol.strict");

   142             // default to false

   143             encodeEolStrict = s != null && s.equalsIgnoreCase("true");

   144             s = SAAJUtil.getSystemProperty("mail.mime.foldencodedwords");

   145             // default to false

   146             foldEncodedWords = s != null && s.equalsIgnoreCase("true");

   147             s = SAAJUtil.getSystemProperty("mail.mime.foldtext");

   148             // default to true

   149             foldText = s == null || !s.equalsIgnoreCase("false");

   150         } catch (SecurityException sex) {

   151             // ignore it

   152         }

   153     }

   156     /**

   157      * Get the content-transfer-encoding that should be applied

   158      * to the input stream of this datasource, to make it mailsafe. <p>

   159      *

   160      * The algorithm used here is: <br>

   161      * <ul>

   162      * <li>

   163      * If the primary type of this datasource is "text" and if all

   164      * the bytes in its input stream are US-ASCII, then the encoding

   165      * is "7bit". If more than half of the bytes are non-US-ASCII, then

   166      * the encoding is "base64". If less than half of the bytes are

   167      * non-US-ASCII, then the encoding is "quoted-printable".

   168      * <li>

   169      * If the primary type of this datasource is not "text", then if

   170      * all the bytes of its input stream are US-ASCII, the encoding

   171      * is "7bit". If there is even one non-US-ASCII character, the

   172      * encoding is "base64".

   173      * </ul>

   174      *

   175      * @param   ds      DataSource

   176      * @return          the encoding. This is either "7bit",

   177      *                  "quoted-printable" or "base64"

   178      */

   179     public static String getEncoding(DataSource ds) {

   180         ContentType cType = null;

   181         InputStream is = null;

   182         String encoding = null;

   184         try {

   185             cType = new ContentType(ds.getContentType());

   186             is = ds.getInputStream();

   187         } catch (Exception ex) {

   188             return "base64"; // what else ?!

   189         }

   191         boolean isText = cType.match("text/*");

   192         // if not text, stop processing when we see non-ASCII

   193         int i = checkAscii(is, ALL, !isText);

   194         switch (i) {

   195         case ALL_ASCII:

   196             encoding = "7bit"; // all ascii

   197             break;

   198         case MOSTLY_ASCII:

   199             encoding = "quoted-printable"; // mostly ascii

   200             break;

   201         default:

   202             encoding = "base64"; // mostly binary

   203             break;

   204         }

   206         // Close the input stream

   207         try {

   208             is.close();

   209         } catch (IOException ioex) { }

   211         return encoding;

   212     }

   214     /**

   215      * Same as <code>getEncoding(DataSource)</code> except that instead

   216      * of reading the data from an <code>InputStream</code> it uses the

   217      * <code>writeTo</code> method to examine the data.  This is more

   218      * efficient in the common case of a <code>DataHandler</code>

   219      * created with an object and a MIME type (for example, a

   220      * "text/plain" String) because all the I/O is done in this

   221      * thread.  In the case requiring an <code>InputStream</code> the

   222      * <code>DataHandler</code> uses a thread, a pair of pipe streams,

   223      * and the <code>writeTo</code> method to produce the data. <p>

   224      *

   225      * @since   JavaMail 1.2

   226      */

   227     public static String getEncoding(DataHandler dh) {

   228         ContentType cType = null;

   229         String encoding = null;

   231         /*

   232          * Try to pick the most efficient means of determining the

   233          * encoding.  If this DataHandler was created using a DataSource,

   234          * the getEncoding(DataSource) method is typically faster.  If

   235          * the DataHandler was created with an object, this method is

   236          * much faster.  To distinguish the two cases, we use a heuristic.

   237          * A DataHandler created with an object will always have a null name.

   238          * A DataHandler created with a DataSource will usually have a

   239          * non-null name.

   240          *

   241          * XXX - This is actually quite a disgusting hack, but it makes

   242          *       a common case run over twice as fast.

   243          */

   244         if (dh.getName() != null)

   245             return getEncoding(dh.getDataSource());

   247         try {

   248             cType = new ContentType(dh.getContentType());

   249         } catch (Exception ex) {

   250             return "base64"; // what else ?!

   251         }

   253         if (cType.match("text/*")) {

   254             // Check all of the available bytes

   255             AsciiOutputStream aos = new AsciiOutputStream(false, false);

   256             try {

   257                 dh.writeTo(aos);

   258             } catch (IOException ex) { }        // ignore it

   259             switch (aos.getAscii()) {

   260             case ALL_ASCII:

   261                 encoding = "7bit"; // all ascii

   262                 break;

   263             case MOSTLY_ASCII:

   264                 encoding = "quoted-printable"; // mostly ascii

   265                 break;

   266             default:

   267                 encoding = "base64"; // mostly binary

   268                 break;

   269             }

   270         } else { // not "text"

   271             // Check all of available bytes, break out if we find

   272             // at least one non-US-ASCII character

   273             AsciiOutputStream aos =

   274                         new AsciiOutputStream(true, encodeEolStrict);

   275             try {

   276                 dh.writeTo(aos);

   277             } catch (IOException ex) { }        // ignore it

   278             if (aos.getAscii() == ALL_ASCII) // all ascii

   279                 encoding = "7bit";

   280             else // found atleast one non-ascii character, use b64

   281                 encoding = "base64";

   282         }

   284         return encoding;

   285     }

   287     /**

   288      * Decode the given input stream. The Input stream returned is

   289      * the decoded input stream. All the encodings defined in RFC 2045

   290      * are supported here. They include "base64", "quoted-printable",

   291      * "7bit", "8bit", and "binary". In addition, "uuencode" is also

   292      * supported.

   293      *

   294      * @param   is              input stream

   295      * @param   encoding        the encoding of the stream.

   296      * @return                  decoded input stream.

   297      */

   298     public static InputStream decode(InputStream is, String encoding)

   299                 throws MessagingException {

   300         if (encoding.equalsIgnoreCase("base64"))

   301             return new BASE64DecoderStream(is);

   302         else if (encoding.equalsIgnoreCase("quoted-printable"))

   303             return new QPDecoderStream(is);

   304         else if (encoding.equalsIgnoreCase("uuencode") ||

   305                  encoding.equalsIgnoreCase("x-uuencode") ||

   306                  encoding.equalsIgnoreCase("x-uue"))

   307             return new UUDecoderStream(is);

   308         else if (encoding.equalsIgnoreCase("binary") ||

   309                  encoding.equalsIgnoreCase("7bit") ||

   310                  encoding.equalsIgnoreCase("8bit"))

   311             return is;

   312         else

   313             throw new MessagingException("Unknown encoding: " + encoding);

   314     }

   316     /**

   317      * Wrap an encoder around the given output stream.

   318      * All the encodings defined in RFC 2045 are supported here.

   319      * They include "base64", "quoted-printable", "7bit", "8bit" and

   320      * "binary". In addition, "uuencode" is also supported.

   321      *

   322      * @param   os              output stream

   323      * @param   encoding        the encoding of the stream.

   324      * @return                  output stream that applies the

   325      *                          specified encoding.

   326      */

   327     public static OutputStream encode(OutputStream os, String encoding)

   328                 throws MessagingException {

   329         if (encoding == null)

   330             return os;

   331         else if (encoding.equalsIgnoreCase("base64"))

   332             return new BASE64EncoderStream(os);

   333         else if (encoding.equalsIgnoreCase("quoted-printable"))

   334             return new QPEncoderStream(os);

   335         else if (encoding.equalsIgnoreCase("uuencode") ||

   336                  encoding.equalsIgnoreCase("x-uuencode") ||

   337                  encoding.equalsIgnoreCase("x-uue"))

   338             return new UUEncoderStream(os);

   339         else if (encoding.equalsIgnoreCase("binary") ||

   340                  encoding.equalsIgnoreCase("7bit") ||

   341                  encoding.equalsIgnoreCase("8bit"))

   342             return os;

   343         else

   344             throw new MessagingException("Unknown encoding: " +encoding);

   345     }

   347     /**

   348      * Wrap an encoder around the given output stream.

   349      * All the encodings defined in RFC 2045 are supported here.

   350      * They include "base64", "quoted-printable", "7bit", "8bit" and

   351      * "binary". In addition, "uuencode" is also supported.

   352      * The <code>filename</code> parameter is used with the "uuencode"

   353      * encoding and is included in the encoded output.

   354      *

   355      * @param   os              output stream

   356      * @param   encoding        the encoding of the stream.

   357      * @param   filename        name for the file being encoded (only used

   358      *                          with uuencode)

   359      * @return                  output stream that applies the

   360      *                          specified encoding.

   361      * @since                   JavaMail 1.2

   362      */

   363     public static OutputStream encode(OutputStream os, String encoding,

   364                                       String filename)

   365                 throws MessagingException {

   366         if (encoding == null)

   367             return os;

   368         else if (encoding.equalsIgnoreCase("base64"))

   369             return new BASE64EncoderStream(os);

   370         else if (encoding.equalsIgnoreCase("quoted-printable"))

   371             return new QPEncoderStream(os);

   372         else if (encoding.equalsIgnoreCase("uuencode") ||

   373                  encoding.equalsIgnoreCase("x-uuencode") ||

   374                  encoding.equalsIgnoreCase("x-uue"))

   375             return new UUEncoderStream(os, filename);

   376         else if (encoding.equalsIgnoreCase("binary") ||

   377                  encoding.equalsIgnoreCase("7bit") ||

   378                  encoding.equalsIgnoreCase("8bit"))

   379             return os;

   380         else

   381             throw new MessagingException("Unknown encoding: " +encoding);

   382     }

   384     /**

   385      * Encode a RFC 822 "text" token into mail-safe form as per

   386      * RFC 2047. <p>

   387      *

   388      * The given Unicode string is examined for non US-ASCII

   389      * characters. If the string contains only US-ASCII characters,

   390      * it is returned as-is.  If the string contains non US-ASCII

   391      * characters, it is first character-encoded using the platform's

   392      * default charset, then transfer-encoded using either the B or

   393      * Q encoding. The resulting bytes are then returned as a Unicode

   394      * string containing only ASCII  characters. <p>

   395      *

   396      * Note that this method should be used to encode only

   397      * "unstructured" RFC 822 headers. <p>

   398      *

   399      * Example of usage:

   400      * <p><blockquote><pre>

   401      *

   402      *  MimeBodyPart part = ...

   403      *  String rawvalue = "FooBar Mailer, Japanese version 1.1"

   404      *  try {

   405      *    // If we know for sure that rawvalue contains only US-ASCII

   406      *    // characters, we can skip the encoding part

   407      *    part.setHeader("X-mailer", MimeUtility.encodeText(rawvalue));

   408      *  } catch (UnsupportedEncodingException e) {

   409      *    // encoding failure

   410      *  } catch (MessagingException me) {

   411      *   // setHeader() failure

   412      *  }

   413      *

   414      * </pre></blockquote><p>

   415      *

   416      * @param   text    unicode string

   417      * @return  Unicode string containing only US-ASCII characters

   418      * @exception UnsupportedEncodingException if the encoding fails

   419      */

   420     public static String encodeText(String text)

   421                         throws UnsupportedEncodingException {

   422         return encodeText(text, null, null);

   423     }

   425     /**

   426      * Encode a RFC 822 "text" token into mail-safe form as per

   427      * RFC 2047. <p>

   428      *

   429      * The given Unicode string is examined for non US-ASCII

   430      * characters. If the string contains only US-ASCII characters,

   431      * it is returned as-is.  If the string contains non US-ASCII

   432      * characters, it is first character-encoded using the specified

   433      * charset, then transfer-encoded using either the B or Q encoding.

   434      * The resulting bytes are then returned as a Unicode string

   435      * containing only ASCII characters. <p>

   436      *

   437      * Note that this method should be used to encode only

   438      * "unstructured" RFC 822 headers.

   439      *

   440      * @param   text    the header value

   441      * @param   charset the charset. If this parameter is null, the

   442      *          platform's default chatset is used.

   443      * @param   encoding the encoding to be used. Currently supported

   444      *          values are "B" and "Q". If this parameter is null, then

   445      *          the "Q" encoding is used if most of characters to be

   446      *          encoded are in the ASCII charset, otherwise "B" encoding

   447      *          is used.

   448      * @return  Unicode string containing only US-ASCII characters

   449      */

   450     public static String encodeText(String text, String charset,

   451                                     String encoding)

   452                         throws UnsupportedEncodingException {

   453         return encodeWord(text, charset, encoding, false);

   454     }

   456     /**

   457      * Decode "unstructured" headers, that is, headers that are defined

   458      * as '*text' as per RFC 822. <p>

   459      *

   460      * The string is decoded using the algorithm specified in

   461      * RFC 2047, Section 6.1.1. If the charset-conversion fails

   462      * for any sequence, an UnsupportedEncodingException is thrown.

   463      * If the String is not an RFC 2047 style encoded header, it is

   464      * returned as-is <p>

   465      *

   466      * Example of usage:

   467      * <p><blockquote><pre>

   468      *

   469      *  MimeBodyPart part = ...

   470      *  String rawvalue = null;

   471      *  String  value = null;

   472      *  try {

   473      *    if ((rawvalue = part.getHeader("X-mailer")[0]) != null)

   474      *      value = MimeUtility.decodeText(rawvalue);

   475      *  } catch (UnsupportedEncodingException e) {

   476      *      // Don't care

   477      *      value = rawvalue;

   478      *  } catch (MessagingException me) { }

   479      *

   480      *  return value;

   481      *

   482      * </pre></blockquote><p>

   483      *

   484      * @param   etext   the possibly encoded value

   485      * @exception       UnsupportedEncodingException if the charset

   486      *                  conversion failed.

   487      */

   488     public static String decodeText(String etext)

   489                 throws UnsupportedEncodingException {

   490         /*

   491          * We look for sequences separated by "linear-white-space".

   492          * (as per RFC 2047, Section 6.1.1)

   493          * RFC 822 defines "linear-white-space" as SPACE | HT | CR | NL.

   494          */

   495         String lwsp = " \t\n\r";

   496         StringTokenizer st;

   498         /*

   499          * First, lets do a quick run thru the string and check

   500          * whether the sequence "=?"  exists at all. If none exists,

   501          * we know there are no encoded-words in here and we can just

   502          * return the string as-is, without suffering thru the later

   503          * decoding logic.

   504          * This handles the most common case of unencoded headers

   505          * efficiently.

   506          */

   507         if (etext.indexOf("=?") == -1)

   508             return etext;

   510         // Encoded words found. Start decoding ...

   512         st = new StringTokenizer(etext, lwsp, true);

   513         StringBuffer sb = new StringBuffer();  // decode buffer

   514         StringBuffer wsb = new StringBuffer(); // white space buffer

   515         boolean prevWasEncoded = false;

   517         while (st.hasMoreTokens()) {

   518             char c;

   519             String s = st.nextToken();

   520             // If whitespace, append it to the whitespace buffer

   521             if (((c = s.charAt(0)) == ' ') || (c == '\t') ||

   522                 (c == '\r') || (c == '\n'))

   523                 wsb.append(c);

   524             else {

   525                 // Check if token is an 'encoded-word' ..

   526                 String word;

   527                 try {

   528                     word = decodeWord(s);

   529                     // Yes, this IS an 'encoded-word'.

   530                     if (!prevWasEncoded && wsb.length() > 0) {

   531                         // if the previous word was also encoded, we

   532                         // should ignore the collected whitespace. Else

   533                         // we include the whitespace as well.

   534                         sb.append(wsb);

   535                     }

   536                     prevWasEncoded = true;

   537                 } catch (ParseException pex) {

   538                     // This is NOT an 'encoded-word'.

   539                     word = s;

   540                     // possibly decode inner encoded words

   541                     if (!decodeStrict)

   542                         word = decodeInnerWords(word);

   543                     // include colleced whitespace ..

   544                     if (wsb.length() > 0)

   545                         sb.append(wsb);

   546                     prevWasEncoded = false;

   547                 }

   548                 sb.append(word); // append the actual word

   549                 wsb.setLength(0); // reset wsb for reuse

   550             }

   551         }

   552         return sb.toString();

   553     }

   555     /**

   556      * Encode a RFC 822 "word" token into mail-safe form as per

   557      * RFC 2047. <p>

   558      *

   559      * The given Unicode string is examined for non US-ASCII

   560      * characters. If the string contains only US-ASCII characters,

   561      * it is returned as-is.  If the string contains non US-ASCII

   562      * characters, it is first character-encoded using the platform's

   563      * default charset, then transfer-encoded using either the B or

   564      * Q encoding. The resulting bytes are then returned as a Unicode

   565      * string containing only ASCII  characters. <p>

   566      *

   567      * This method is meant to be used when creating RFC 822 "phrases".

   568      * The InternetAddress class, for example, uses this to encode

   569      * it's 'phrase' component.

   570      *

   571      * @param   text    unicode string

   572      * @return  Array of Unicode strings containing only US-ASCII

   573      *          characters.

   574      * @exception UnsupportedEncodingException if the encoding fails

   575      */

   576     public static String encodeWord(String word)

   577                         throws UnsupportedEncodingException {

   578         return encodeWord(word, null, null);

   579     }

   581     /**

   582      * Encode a RFC 822 "word" token into mail-safe form as per

   583      * RFC 2047. <p>

   584      *

   585      * The given Unicode string is examined for non US-ASCII

   586      * characters. If the string contains only US-ASCII characters,

   587      * it is returned as-is.  If the string contains non US-ASCII

   588      * characters, it is first character-encoded using the specified

   589      * charset, then transfer-encoded using either the B or Q encoding.

   590      * The resulting bytes are then returned as a Unicode string

   591      * containing only ASCII characters. <p>

   592      *

   593      * @param   text    unicode string

   594      * @param   charset the MIME charset

   595      * @param   encoding the encoding to be used. Currently supported

   596      *          values are "B" and "Q". If this parameter is null, then

   597      *          the "Q" encoding is used if most of characters to be

   598      *          encoded are in the ASCII charset, otherwise "B" encoding

   599      *          is used.

   600      * @return  Unicode string containing only US-ASCII characters

   601      * @exception UnsupportedEncodingException if the encoding fails

   602      */

   603     public static String encodeWord(String word, String charset,

   604                                     String encoding)

   605                         throws UnsupportedEncodingException {

   606         return encodeWord(word, charset, encoding, true);

   607     }

   609     /*

   610      * Encode the given string. The parameter 'encodingWord' should

   611      * be true if a RFC 822 "word" token is being encoded and false if a

   612      * RFC 822 "text" token is being encoded. This is because the

   613      * "Q" encoding defined in RFC 2047 has more restrictions when

   614      * encoding "word" tokens. (Sigh)

   615      */

   616     private static String encodeWord(String string, String charset,

   617                                      String encoding, boolean encodingWord)

   618                         throws UnsupportedEncodingException {

   620         // If 'string' contains only US-ASCII characters, just

   621         // return it.

   622         int ascii = checkAscii(string);

   623         if (ascii == ALL_ASCII)

   624             return string;

   626         // Else, apply the specified charset conversion.

   627         String jcharset;

   628         if (charset == null) { // use default charset

   629             jcharset = getDefaultJavaCharset(); // the java charset

   630             charset = getDefaultMIMECharset(); // the MIME equivalent

   631         } else // MIME charset -> java charset

   632             jcharset = javaCharset(charset);

   634         // If no transfer-encoding is specified, figure one out.

   635         if (encoding == null) {

   636             if (ascii != MOSTLY_NONASCII)

   637                 encoding = "Q";

   638             else

   639                 encoding = "B";

   640         }

   642         boolean b64;

   643         if (encoding.equalsIgnoreCase("B"))

   644             b64 = true;

   645         else if (encoding.equalsIgnoreCase("Q"))

   646             b64 = false;

   647         else

   648             throw new UnsupportedEncodingException(

   649                         "Unknown transfer encoding: " + encoding);

   651         StringBuffer outb = new StringBuffer(); // the output buffer

   652         doEncode(string, b64, jcharset,

   653                  // As per RFC 2047, size of an encoded string should not

   654                  // exceed 75 bytes.

   655                  // 7 = size of "=?", '?', 'B'/'Q', '?', "?="

   656                  75 - 7 - charset.length(), // the available space

   657                  "=?" + charset + "?" + encoding + "?", // prefix

   658                  true, encodingWord, outb);

   660         return outb.toString();

   661     }

   663     private static void doEncode(String string, boolean b64,

   664                 String jcharset, int avail, String prefix,

   665                 boolean first, boolean encodingWord, StringBuffer buf)

   666                         throws UnsupportedEncodingException {

   668         // First find out what the length of the encoded version of

   669         // 'string' would be.

   670         byte[] bytes = string.getBytes(jcharset);

   671         int len;

   672         if (b64) // "B" encoding

   673             len = BEncoderStream.encodedLength(bytes);

   674         else // "Q"

   675             len = QEncoderStream.encodedLength(bytes, encodingWord);

   677         int size;

   678         if ((len > avail) && ((size = string.length()) > 1)) {

   679             // If the length is greater than 'avail', split 'string'

   680             // into two and recurse.

   681             doEncode(string.substring(0, size/2), b64, jcharset,

   682                      avail, prefix, first, encodingWord, buf);

   683             doEncode(string.substring(size/2, size), b64, jcharset,

   684                      avail, prefix, false, encodingWord, buf);

   685         } else {

   686             // length <= than 'avail'. Encode the given string

   687             ByteArrayOutputStream os = new ByteArrayOutputStream(BUFFER_SIZE);

   688             OutputStream eos; // the encoder

   689             if (b64) // "B" encoding

   690                 eos = new BEncoderStream(os);

   691             else // "Q" encoding

   692                 eos = new QEncoderStream(os, encodingWord);

   694             try { // do the encoding

   695                 eos.write(bytes);

   696                 eos.close();

   697             } catch (IOException ioex) { }

   699             byte[] encodedBytes = os.toByteArray(); // the encoded stuff

   700             // Now write out the encoded (all ASCII) bytes into our

   701             // StringBuffer

   702             if (!first) // not the first line of this sequence

   703                 if (foldEncodedWords)

   704                     buf.append("\r\n "); // start a continuation line

   705                 else

   706                     buf.append(" "); // line will be folded later

   708             buf.append(prefix);

   709             for (int i = 0; i < encodedBytes.length; i++)

   710                 buf.append((char)encodedBytes[i]);

   711             buf.append("?="); // terminate the current sequence

   712         }

   713     }

   715     /**

   716      * The string is parsed using the rules in RFC 2047 for parsing

   717      * an "encoded-word". If the parse fails, a ParseException is

   718      * thrown. Otherwise, it is transfer-decoded, and then

   719      * charset-converted into Unicode. If the charset-conversion

   720      * fails, an UnsupportedEncodingException is thrown.<p>

   721      *

   722      * @param   eword   the possibly encoded value

   723      * @exception       ParseException if the string is not an

   724      *                  encoded-word as per RFC 2047.

   725      * @exception       UnsupportedEncodingException if the charset

   726      *                  conversion failed.

   727      */

   728     public static String decodeWord(String eword)

   729                 throws ParseException, UnsupportedEncodingException {

   731         if (!eword.startsWith("=?")) // not an encoded word

   732             throw new ParseException();

   734         // get charset

   735         int start = 2; int pos;

   736         if ((pos = eword.indexOf('?', start)) == -1)

   737             throw new ParseException();

   738         String charset = javaCharset(eword.substring(start, pos));

   740         // get encoding

   741         start = pos+1;

   742         if ((pos = eword.indexOf('?', start)) == -1)

   743             throw new ParseException();

   744         String encoding = eword.substring(start, pos);

   746         // get encoded-sequence

   747         start = pos+1;

   748         if ((pos = eword.indexOf("?=", start)) == -1)

   749             throw new ParseException();

   750         String word = eword.substring(start, pos);

   752         try {

   753             // Extract the bytes from word

   754             ByteArrayInputStream bis =

   755                 new ByteArrayInputStream(ASCIIUtility.getBytes(word));

   757             // Get the appropriate decoder

   758             InputStream is;

   759             if (encoding.equalsIgnoreCase("B"))

   760                 is = new BASE64DecoderStream(bis);

   761             else if (encoding.equalsIgnoreCase("Q"))

   762                 is = new QDecoderStream(bis);

   763             else

   764                 throw new UnsupportedEncodingException(

   765                                 "unknown encoding: " + encoding);

   767             // For b64 & q, size of decoded word <= size of word. So

   768             // the decoded bytes must fit into the 'bytes' array. This

   769             // is certainly more efficient than writing bytes into a

   770             // ByteArrayOutputStream and then pulling out the byte[]

   771             // from it.

   772             int count = bis.available();

   773             byte[] bytes = new byte[count];

   774             // count is set to the actual number of decoded bytes

   775             count = is.read(bytes, 0, count);

   777             // Finally, convert the decoded bytes into a String using

   778             // the specified charset

   779             String s = new String(bytes, 0, count, charset);

   780             if (pos + 2 < eword.length()) {

   781                 // there's still more text in the string

   782                 String rest = eword.substring(pos + 2);

   783                 if (!decodeStrict)

   784                     rest = decodeInnerWords(rest);

   785                 s += rest;

   786             }

   787             return s;

   788         } catch (UnsupportedEncodingException uex) {

   789             // explicitly catch and rethrow this exception, otherwise

   790             // the below IOException catch will swallow this up!

   791             throw uex;

   792         } catch (IOException ioex) {

   793             // Shouldn't happen.

   794             throw new ParseException();

   795         } catch (IllegalArgumentException iex) {

   796             /* An unknown charset of the form ISO-XXX-XXX, will cause

   797              * the JDK to throw an IllegalArgumentException ... Since the

   798              * JDK will attempt to create a classname using this string,

   799              * but valid classnames must not contain the character '-',

   800              * and this results in an IllegalArgumentException, rather than

   801              * the expected UnsupportedEncodingException. Yikes

   802              */

   803             throw new UnsupportedEncodingException();

   804         }

   805     }

   807     /**

   808      * Look for encoded words within a word.  The MIME spec doesn't

   809      * allow this, but many broken mailers, especially Japanese mailers,

   810      * produce such incorrect encodings.

   811      */

   812     private static String decodeInnerWords(String word)

   813                                 throws UnsupportedEncodingException {

   814         int start = 0, i;

   815         StringBuffer buf = new StringBuffer();

   816         while ((i = word.indexOf("=?", start)) >= 0) {

   817             buf.append(word.substring(start, i));

   818             int end = word.indexOf("?=", i);

   819             if (end < 0)

   820                 break;

   821             String s = word.substring(i, end + 2);

   822             try {

   823                 s = decodeWord(s);

   824             } catch (ParseException pex) {

   825                 // ignore it, just use the original string

   826             }

   827             buf.append(s);

   828             start = end + 2;

   829         }

   830         if (start == 0)

   831             return word;

   832         if (start < word.length())

   833             buf.append(word.substring(start));

   834         return buf.toString();

   835     }

   837     /**

   838      * A utility method to quote a word, if the word contains any

   839      * characters from the specified 'specials' list.<p>

   840      *

   841      * The <code>HeaderTokenizer</code> class defines two special

   842      * sets of delimiters - MIME and RFC 822. <p>

   843      *

   844      * This method is typically used during the generation of

   845      * RFC 822 and MIME header fields.

   846      *

   847      * @param   word    word to be quoted

   848      * @param   specials the set of special characters

   849      * @return          the possibly quoted word

   850      * @see     javax.mail.internet.HeaderTokenizer#MIME

   851      * @see     javax.mail.internet.HeaderTokenizer#RFC822

   852      */

   853     public static String quote(String word, String specials) {

   854         int len = word.length();

   856         /*

   857          * Look for any "bad" characters, Escape and

   858          *  quote the entire string if necessary.

   859          */

   860         boolean needQuoting = false;

   861         for (int i = 0; i < len; i++) {

   862             char c = word.charAt(i);

   863             if (c == '"' || c == '\\' || c == '\r' || c == '\n') {

   864                 // need to escape them and then quote the whole string

   865                 StringBuffer sb = new StringBuffer(len + 3);

   866                 sb.append('"');

   867                 sb.append(word.substring(0, i));

   868                 int lastc = 0;

   869                 for (int j = i; j < len; j++) {

   870                     char cc = word.charAt(j);

   871                     if ((cc == '"') || (cc == '\\') ||

   872                         (cc == '\r') || (cc == '\n'))

   873                         if (cc == '\n' && lastc == '\r')

   874                             ;   // do nothing, CR was already escaped

   875                         else

   876                             sb.append('\\');    // Escape the character

   877                     sb.append(cc);

   878                     lastc = cc;

   879                 }

   880                 sb.append('"');

   881                 return sb.toString();

   882             } else if (c < 040 || c >= 0177 || specials.indexOf(c) >= 0)

   883                 // These characters cause the string to be quoted

   884                 needQuoting = true;

   885         }

   887         if (needQuoting) {

   888             StringBuffer sb = new StringBuffer(len + 2);

   889             sb.append('"').append(word).append('"');

   890             return sb.toString();

   891         } else

   892             return word;

   893     }

   895     /**

   896      * Fold a string at linear whitespace so that each line is no longer

   897      * than 76 characters, if possible.  If there are more than 76

   898      * non-whitespace characters consecutively, the string is folded at

   899      * the first whitespace after that sequence.  The parameter

   900      * <code>used</code> indicates how many characters have been used in

   901      * the current line; it is usually the length of the header name. <p>

   902      *

   903      * Note that line breaks in the string aren't escaped; they probably

   904      * should be.

   905      *

   906      * @param   used    characters used in line so far

   907      * @param   s       the string to fold

   908      * @return          the folded string

   909      */

   910     /*public*/ static String fold(int used, String s) {

   911         if (!foldText)

   912             return s;

   914         int end;

   915         char c;

   916         // Strip trailing spaces

   917         for (end = s.length() - 1; end >= 0; end--) {

   918             c = s.charAt(end);

   919             if (c != ' ' && c != '\t')

   920                 break;

   921         }

   922         if (end != s.length() - 1)

   923             s = s.substring(0, end + 1);

   925         // if the string fits now, just return it

   926         if (used + s.length() <= 76)

   927             return s;

   929         // have to actually fold the string

   930         StringBuffer sb = new StringBuffer(s.length() + 4);

   931         char lastc = 0;

   932         while (used + s.length() > 76) {

   933             int lastspace = -1;

   934             for (int i = 0; i < s.length(); i++) {

   935                 if (lastspace != -1 && used + i > 76)

   936                     break;

   937                 c = s.charAt(i);

   938                 if (c == ' ' || c == '\t')

   939                     if (!(lastc == ' ' || lastc == '\t'))

   940                         lastspace = i;

   941                 lastc = c;

   942             }

   943             if (lastspace == -1) {

   944                 // no space, use the whole thing

   945                 sb.append(s);

   946                 s = "";

   947                 used = 0;

   948                 break;

   949             }

   950             sb.append(s.substring(0, lastspace));

   951             sb.append("\r\n");

   952             lastc = s.charAt(lastspace);

   953             sb.append(lastc);

   954             s = s.substring(lastspace + 1);

   955             used = 1;

   956         }

   957         sb.append(s);

   958         return sb.toString();

   959     }

   961     /**

   962      * Unfold a folded header.  Any line breaks that aren't escaped and

   963      * are followed by whitespace are removed.

   964      *

   965      * @param   s       the string to unfold

   966      * @return          the unfolded string

   967      */

   968     /*public*/ static String unfold(String s) {

   969         if (!foldText)

   970             return s;

   972         StringBuffer sb = null;

   973         int i;

   974         while ((i = indexOfAny(s, "\r\n")) >= 0) {

   975             int start = i;

   976             int l = s.length();

   977             i++;                // skip CR or NL

   978             if (i < l && s.charAt(i - 1) == '\r' && s.charAt(i) == '\n')

   979                 i++;    // skip LF

   980             if (start == 0 || s.charAt(start - 1) != '\\') {

   981                 char c;

   982                 // if next line starts with whitespace, skip all of it

   983                 // XXX - always has to be true?

   984                 if (i < l && ((c = s.charAt(i)) == ' ' || c == '\t')) {

   985                     i++;        // skip whitespace

   986                     while (i < l && ((c = s.charAt(i)) == ' ' || c == '\t'))

   987                         i++;

   988                     if (sb == null)

   989                         sb = new StringBuffer(s.length());

   990                     if (start != 0) {

   991                         sb.append(s.substring(0, start));

   992                         sb.append(' ');

   993                     }

   994                     s = s.substring(i);

   995                     continue;

   996                 }

   997                 // it's not a continuation line, just leave it in

   998                 if (sb == null)

   999                     sb = new StringBuffer(s.length());

  1000                 sb.append(s.substring(0, i));

  1001                 s = s.substring(i);

  1002             } else {

  1003                 // there's a backslash at "start - 1"

  1004                 // strip it out, but leave in the line break

  1005                 if (sb == null)

  1006                     sb = new StringBuffer(s.length());

  1007                 sb.append(s.substring(0, start - 1));

  1008                 sb.append(s.substring(start, i));

  1009                 s = s.substring(i);

  1010             }

  1011         }

  1012         if (sb != null) {

  1013             sb.append(s);

  1014             return sb.toString();

  1015         } else

  1016             return s;

  1017     }

  1019     /**

  1020      * Return the first index of any of the characters in "any" in "s",

  1021      * or -1 if none are found.

  1022      *

  1023      * This should be a method on String.

  1024      */

  1025     private static int indexOfAny(String s, String any) {

  1026         return indexOfAny(s, any, 0);

  1027     }

  1029     private static int indexOfAny(String s, String any, int start) {

  1030         try {

  1031             int len = s.length();

  1032             for (int i = start; i < len; i++) {

  1033                 if (any.indexOf(s.charAt(i)) >= 0)

  1034                     return i;

  1035             }

  1036             return -1;

  1037         } catch (StringIndexOutOfBoundsException e) {

  1038             return -1;

  1039         }

  1040     }

  1042     /**

  1043      * Convert a MIME charset name into a valid Java charset name. <p>

  1044      *

  1045      * @param charset   the MIME charset name

  1046      * @return  the Java charset equivalent. If a suitable mapping is

  1047      *          not available, the passed in charset is itself returned.

  1048      */

  1049     public static String javaCharset(String charset) {

  1050         if (mime2java == null || charset == null)

  1051             // no mapping table, or charset parameter is null

  1052             return charset;

  1054         String alias = (String)mime2java.get(charset.toLowerCase());

  1055         return alias == null ? charset : alias;

  1056     }

  1058     /**

  1059      * Convert a java charset into its MIME charset name. <p>

  1060      *

  1061      * Note that a future version of JDK (post 1.2) might provide

  1062      * this functionality, in which case, we may deprecate this

  1063      * method then.

  1064      *

  1065      * @param   charset    the JDK charset

  1066      * @return          the MIME/IANA equivalent. If a mapping

  1067      *                  is not possible, the passed in charset itself

  1068      *                  is returned.

  1069      * @since           JavaMail 1.1

  1070      */

  1071     public static String mimeCharset(String charset) {

  1072         if (java2mime == null || charset == null)

  1073             // no mapping table or charset param is null

  1074             return charset;

  1076         String alias = (String)java2mime.get(charset.toLowerCase());

  1077         return alias == null ? charset : alias;

  1078     }

  1080     private static String defaultJavaCharset;

  1081     private static String defaultMIMECharset;

  1083     /**

  1084      * Get the default charset corresponding to the system's current

  1085      * default locale.  If the System property <code>mail.mime.charset</code>

  1086      * is set, a system charset corresponding to this MIME charset will be

  1087      * returned. <p>

  1088      *

  1089      * @return  the default charset of the system's default locale,

  1090      *          as a Java charset. (NOT a MIME charset)

  1091      * @since   JavaMail 1.1

  1092      */

  1093     public static String getDefaultJavaCharset() {

  1094         if (defaultJavaCharset == null) {

  1095             /*

  1096              * If mail.mime.charset is set, it controls the default

  1097              * Java charset as well.

  1098              */

  1099             String mimecs = null;

  1101             mimecs = SAAJUtil.getSystemProperty("mail.mime.charset");

  1103             if (mimecs != null && mimecs.length() > 0) {

  1104                 defaultJavaCharset = javaCharset(mimecs);

  1105                 return defaultJavaCharset;

  1106             }

  1108             try {

  1109                 defaultJavaCharset = System.getProperty("file.encoding",

  1110                                                         "8859_1");

  1111             } catch (SecurityException sex) {

  1113                 class NullInputStream extends InputStream {

  1114                     public int read() {

  1115                         return 0;

  1116                     }

  1117                 }

  1118                 InputStreamReader reader =

  1119                         new InputStreamReader(new NullInputStream());

  1120                 defaultJavaCharset = reader.getEncoding();

  1121                 if (defaultJavaCharset == null)

  1122                     defaultJavaCharset = "8859_1";

  1123             }

  1124         }

  1126         return defaultJavaCharset;

  1127     }

  1129     /*

  1130      * Get the default MIME charset for this locale.

  1131      */

  1132     static String getDefaultMIMECharset() {

  1133         if (defaultMIMECharset == null) {

  1134                 defaultMIMECharset = SAAJUtil.getSystemProperty("mail.mime.charset");

  1135         }

  1136         if (defaultMIMECharset == null)

  1137             defaultMIMECharset = mimeCharset(getDefaultJavaCharset());

  1138         return defaultMIMECharset;

  1139     }

  1141     // Tables to map MIME charset names to Java names and vice versa.

  1142     // XXX - Should eventually use J2SE 1.4 java.nio.charset.Charset

  1143     private static Hashtable mime2java;

  1144     private static Hashtable java2mime;

  1146     static {

  1147         java2mime = new Hashtable(40);

  1148         mime2java = new Hashtable(10);

  1150         try {

  1151             // Use this class's classloader to load the mapping file

  1152             // XXX - we should use SecuritySupport, but it's in another package

  1153             InputStream is =

  1154                     com.sun.xml.internal.messaging.saaj.packaging.mime.internet.MimeUtility.class.getResourceAsStream(

  1155                     "/META-INF/javamail.charset.map");

  1157             if (is != null) {

  1158                 is = new LineInputStream(is);

  1160                 // Load the JDK-to-MIME charset mapping table

  1161                 loadMappings((LineInputStream)is, java2mime);

  1163                 // Load the MIME-to-JDK charset mapping table

  1164                 loadMappings((LineInputStream)is, mime2java);

  1165             }

  1166         } catch (Exception ex) { }

  1168         // If we didn't load the tables, e.g., because we didn't have

  1169         // permission, load them manually.  The entries here should be

  1170         // the same as the default javamail.charset.map.

  1171         if (java2mime.isEmpty()) {

  1172             java2mime.put("8859_1", "ISO-8859-1");

  1173             java2mime.put("iso8859_1", "ISO-8859-1");

  1174             java2mime.put("ISO8859-1", "ISO-8859-1");

  1176             java2mime.put("8859_2", "ISO-8859-2");

  1177             java2mime.put("iso8859_2", "ISO-8859-2");

  1178             java2mime.put("ISO8859-2", "ISO-8859-2");

  1180             java2mime.put("8859_3", "ISO-8859-3");

  1181             java2mime.put("iso8859_3", "ISO-8859-3");

  1182             java2mime.put("ISO8859-3", "ISO-8859-3");

  1184             java2mime.put("8859_4", "ISO-8859-4");

  1185             java2mime.put("iso8859_4", "ISO-8859-4");

  1186             java2mime.put("ISO8859-4", "ISO-8859-4");

  1188             java2mime.put("8859_5", "ISO-8859-5");

  1189             java2mime.put("iso8859_5", "ISO-8859-5");

  1190             java2mime.put("ISO8859-5", "ISO-8859-5");

  1192             java2mime.put("8859_6", "ISO-8859-6");

  1193             java2mime.put("iso8859_6", "ISO-8859-6");

  1194             java2mime.put("ISO8859-6", "ISO-8859-6");

  1196             java2mime.put("8859_7", "ISO-8859-7");

  1197             java2mime.put("iso8859_7", "ISO-8859-7");

  1198             java2mime.put("ISO8859-7", "ISO-8859-7");

  1200             java2mime.put("8859_8", "ISO-8859-8");

  1201             java2mime.put("iso8859_8", "ISO-8859-8");

  1202             java2mime.put("ISO8859-8", "ISO-8859-8");

  1204             java2mime.put("8859_9", "ISO-8859-9");

  1205             java2mime.put("iso8859_9", "ISO-8859-9");

  1206             java2mime.put("ISO8859-9", "ISO-8859-9");

  1208             java2mime.put("SJIS", "Shift_JIS");

  1209             java2mime.put("MS932", "Shift_JIS");

  1210             java2mime.put("JIS", "ISO-2022-JP");

  1211             java2mime.put("ISO2022JP", "ISO-2022-JP");

  1212             java2mime.put("EUC_JP", "euc-jp");

  1213             java2mime.put("KOI8_R", "koi8-r");

  1214             java2mime.put("EUC_CN", "euc-cn");

  1215             java2mime.put("EUC_TW", "euc-tw");

  1216             java2mime.put("EUC_KR", "euc-kr");

  1217         }

  1218         if (mime2java.isEmpty()) {

  1219             mime2java.put("iso-2022-cn", "ISO2022CN");

  1220             mime2java.put("iso-2022-kr", "ISO2022KR");

  1221             mime2java.put("utf-8", "UTF8");

  1222             mime2java.put("utf8", "UTF8");

  1223             mime2java.put("ja_jp.iso2022-7", "ISO2022JP");

  1224             mime2java.put("ja_jp.eucjp", "EUCJIS");

  1225             mime2java.put("euc-kr", "KSC5601");

  1226             mime2java.put("euckr", "KSC5601");

  1227             mime2java.put("us-ascii", "ISO-8859-1");

  1228             mime2java.put("x-us-ascii", "ISO-8859-1");

  1229         }

  1230     }

  1232     private static void loadMappings(LineInputStream is, Hashtable table) {

  1233         String currLine;

  1235         while (true) {

  1236             try {

  1237                 currLine = is.readLine();

  1238             } catch (IOException ioex) {

  1239                 break; // error in reading, stop

  1240             }

  1242             if (currLine == null) // end of file, stop

  1243                 break;

  1244             if (currLine.startsWith("--") && currLine.endsWith("--"))

  1245                 // end of this table

  1246                 break;

  1248             // ignore empty lines and comments

  1249             if (currLine.trim().length() == 0 || currLine.startsWith("#"))

  1250                 continue;

  1252             // A valid entry is of the form <key><separator><value>

  1253             // where, <separator> := SPACE | HT. Parse this

  1254             StringTokenizer tk = new StringTokenizer(currLine, " \t");

  1255             try {

  1256                 String key = tk.nextToken();

  1257                 String value = tk.nextToken();

  1258                 table.put(key.toLowerCase(), value);

  1259             } catch (NoSuchElementException nex) { }

  1260         }

  1261     }

  1263     static final int ALL_ASCII          = 1;

  1264     static final int MOSTLY_ASCII       = 2;

  1265     static final int MOSTLY_NONASCII    = 3;

  1267     /**

  1268      * Check if the given string contains non US-ASCII characters.

  1269      * @param   s       string

  1270      * @return          ALL_ASCII if all characters in the string

  1271      *                  belong to the US-ASCII charset. MOSTLY_ASCII

  1272      *                  if more than half of the available characters

  1273      *                  are US-ASCII characters. Else MOSTLY_NONASCII.

  1274      */

  1275     static int checkAscii(String s) {

  1276         int ascii = 0, non_ascii = 0;

  1277         int l = s.length();

  1279         for (int i = 0; i < l; i++) {

  1280             if (nonascii((int)s.charAt(i))) // non-ascii

  1281                 non_ascii++;

  1282             else

  1283                 ascii++;

  1284         }

  1286         if (non_ascii == 0)

  1287             return ALL_ASCII;

  1288         if (ascii > non_ascii)

  1289             return MOSTLY_ASCII;

  1291         return MOSTLY_NONASCII;

  1292     }

  1294     /**

  1295      * Check if the given byte array contains non US-ASCII characters.

  1296      * @param   b       byte array

  1297      * @return          ALL_ASCII if all characters in the string

  1298      *                  belong to the US-ASCII charset. MOSTLY_ASCII

  1299      *                  if more than half of the available characters

  1300      *                  are US-ASCII characters. Else MOSTLY_NONASCII.

  1301      *

  1302      * XXX - this method is no longer used

  1303      */

  1304     static int checkAscii(byte[] b) {

  1305         int ascii = 0, non_ascii = 0;

  1307         for (int i=0; i < b.length; i++) {

  1308             // The '&' operator automatically causes b[i] to be promoted

  1309             // to an int, and we mask out the higher bytes in the int

  1310             // so that the resulting value is not a negative integer.

  1311             if (nonascii(b[i] & 0xff)) // non-ascii

  1312                 non_ascii++;

  1313             else

  1314                 ascii++;

  1315         }

  1317         if (non_ascii == 0)

  1318             return ALL_ASCII;

  1319         if (ascii > non_ascii)

  1320             return MOSTLY_ASCII;

  1322         return MOSTLY_NONASCII;

  1323     }

  1325     /**

  1326      * Check if the given input stream contains non US-ASCII characters.

  1327      * Upto <code>max</code> bytes are checked. If <code>max</code> is

  1328      * set to <code>ALL</code>, then all the bytes available in this

  1329      * input stream are checked. If <code>breakOnNonAscii</code> is true

  1330      * the check terminates when the first non-US-ASCII character is

  1331      * found and MOSTLY_NONASCII is returned. Else, the check continues

  1332      * till <code>max</code> bytes or till the end of stream.

  1333      *

  1334      * @param   is      the input stream

  1335      * @param   max     maximum bytes to check for. The special value

  1336      *                  ALL indicates that all the bytes in this input

  1337      *                  stream must be checked.

  1338      * @param   breakOnNonAscii if <code>true</code>, then terminate the

  1339      *                  the check when the first non-US-ASCII character

  1340      *                  is found.

  1341      * @return          ALL_ASCII if all characters in the string

  1342      *                  belong to the US-ASCII charset. MOSTLY_ASCII

  1343      *                  if more than half of the available characters

  1344      *                  are US-ASCII characters. Else MOSTLY_NONASCII.

  1345      */

  1346     static int checkAscii(InputStream is, int max, boolean breakOnNonAscii) {

  1347         int ascii = 0, non_ascii = 0;

  1348         int len;

  1349         int block = 4096;

  1350         int linelen = 0;

  1351         boolean longLine = false, badEOL = false;

  1352         boolean checkEOL = encodeEolStrict && breakOnNonAscii;

  1353         byte buf[] = null;

  1354         if (max != 0) {

  1355             block = (max == ALL) ? 4096 : Math.min(max, 4096);

  1356             buf = new byte[block];

  1357         }

  1358         while (max != 0) {

  1359             try {

  1360                 if ((len = is.read(buf, 0, block)) == -1)

  1361                     break;

  1362                 int lastb = 0;

  1363                 for (int i = 0; i < len; i++) {

  1364                     // The '&' operator automatically causes b[i] to

  1365                     // be promoted to an int, and we mask out the higher

  1366                     // bytes in the int so that the resulting value is

  1367                     // not a negative integer.

  1368                     int b = buf[i] & 0xff;

  1369                     if (checkEOL &&

  1370                             ((lastb == '\r' && b != '\n') ||

  1371                             (lastb != '\r' && b == '\n')))

  1372                         badEOL = true;

  1373                     if (b == '\r' || b == '\n')

  1374                         linelen = 0;

  1375                     else {

  1376                         linelen++;

  1377                         if (linelen > 998)      // 1000 - CRLF

  1378                             longLine = true;

  1379                     }

  1380                     if (nonascii(b)) {  // non-ascii

  1381                         if (breakOnNonAscii) // we are done

  1382                             return MOSTLY_NONASCII;

  1383                         else

  1384                             non_ascii++;

  1385                     } else

  1386                         ascii++;

  1387                     lastb = b;

  1388                 }

  1389             } catch (IOException ioex) {

  1390                 break;

  1391             }

  1392             if (max != ALL)

  1393                 max -= len;

  1394         }

  1396         if (max == 0 && breakOnNonAscii)

  1397             // We have been told to break on the first non-ascii character.

  1398             // We haven't got any non-ascii character yet, but then we

  1399             // have not checked all of the available bytes either. So we

  1400             // cannot say for sure that this input stream is ALL_ASCII,

  1401             // and hence we must play safe and return MOSTLY_NONASCII

  1403             return MOSTLY_NONASCII;

  1405         if (non_ascii == 0) { // no non-us-ascii characters so far

  1406             // If we're looking at non-text data, and we saw CR without LF

  1407             // or vice versa, consider this mostly non-ASCII so that it

  1408             // will be base64 encoded (since the quoted-printable encoder

  1409             // doesn't encode this case properly).

  1410             if (badEOL)

  1411                 return MOSTLY_NONASCII;

  1412             // if we've seen a long line, we degrade to mostly ascii

  1413             else if (longLine)

  1414                 return MOSTLY_ASCII;

  1415             else

  1416                 return ALL_ASCII;

  1417         }

  1418         if (ascii > non_ascii) // mostly ascii

  1419             return MOSTLY_ASCII;

  1420         return MOSTLY_NONASCII;

  1421     }

  1423     static final boolean nonascii(int b) {

  1424         return b >= 0177 || (b < 040 && b != '\r' && b != '\n' && b != '\t');

  1425     }

  1426 }

  1428 /**

  1429  * An OutputStream that determines whether the data written to

  1430  * it is all ASCII, mostly ASCII, or mostly non-ASCII.

  1431  */

  1432 class AsciiOutputStream extends OutputStream {

  1433     private boolean breakOnNonAscii;

  1434     private int ascii = 0, non_ascii = 0;

  1435     private int linelen = 0;

  1436     private boolean longLine = false;

  1437     private boolean badEOL = false;

  1438     private boolean checkEOL = false;

  1439     private int lastb = 0;

  1440     private int ret = 0;

  1442     public AsciiOutputStream(boolean breakOnNonAscii, boolean encodeEolStrict) {

  1443         this.breakOnNonAscii = breakOnNonAscii;

  1444         checkEOL = encodeEolStrict && breakOnNonAscii;

  1445     }

  1447     public void write(int b) throws IOException {

  1448         check(b);

  1449     }

  1451     public void write(byte b[]) throws IOException {

  1452         write(b, 0, b.length);

  1453     }

  1455     public void write(byte b[], int off, int len) throws IOException {

  1456         len += off;

  1457         for (int i = off; i < len ; i++)

  1458             check(b[i]);

  1459     }

  1461     private final void check(int b) throws IOException {

  1462         b &= 0xff;

  1463         if (checkEOL &&

  1464                 ((lastb == '\r' && b != '\n') || (lastb != '\r' && b == '\n')))

  1465             badEOL = true;

  1466         if (b == '\r' || b == '\n')

  1467             linelen = 0;

  1468         else {

  1469             linelen++;

  1470             if (linelen > 998)  // 1000 - CRLF

  1471                 longLine = true;

  1472         }

  1473         if (MimeUtility.nonascii(b)) { // non-ascii

  1474             non_ascii++;

  1475             if (breakOnNonAscii) {      // we are done

  1476                 ret = MimeUtility.MOSTLY_NONASCII;

  1477                 throw new EOFException();

  1478             }

  1479         } else

  1480             ascii++;

  1481         lastb = b;

  1482     }

  1484     /**

  1485      * Return ASCII-ness of data stream.

  1486      */

  1487     public int getAscii() {

  1488         if (ret != 0)

  1489             return ret;

  1490         // If we're looking at non-text data, and we saw CR without LF

  1491         // or vice versa, consider this mostly non-ASCII so that it

  1492         // will be base64 encoded (since the quoted-printable encoder

  1493         // doesn't encode this case properly).

  1494         if (badEOL)

  1495             return MimeUtility.MOSTLY_NONASCII;

  1496         else if (non_ascii == 0) { // no non-us-ascii characters so far

  1497             // if we've seen a long line, we degrade to mostly ascii

  1498             if (longLine)

  1499                 return MimeUtility.MOSTLY_ASCII;

  1500             else

  1501                 return MimeUtility.ALL_ASCII;

  1502         }

  1503         if (ascii > non_ascii) // mostly ascii

  1504             return MimeUtility.MOSTLY_ASCII;

  1505         return MimeUtility.MOSTLY_NONASCII;

  1506     }

  1507 }

Mercurial > jdk8-mips64-public > jaxws / file revision

src/share/jaxws_classes/com/sun/xml/internal/messaging/saaj/packaging/mime/internet/MimeUtility.java@9c07ef4934dd

src/share/jaxws_classes/com/sun/xml/internal/messaging/saaj/packaging/mime/internet/MimeUtility.java