src/share/jaxws_classes/com/sun/xml/internal/messaging/saaj/packaging/mime/internet/MimeUtility.java

changeset 0
373ffda63c9a
child 637
9c07ef4934dd
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/share/jaxws_classes/com/sun/xml/internal/messaging/saaj/packaging/mime/internet/MimeUtility.java	Wed Apr 27 01:27:09 2016 +0800
     1.3 @@ -0,0 +1,1507 @@
     1.4 +/*
     1.5 + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.  Oracle designates this
    1.11 + * particular file as subject to the "Classpath" exception as provided
    1.12 + * by Oracle in the LICENSE file that accompanied this code.
    1.13 + *
    1.14 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.16 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.17 + * version 2 for more details (a copy is included in the LICENSE file that
    1.18 + * accompanied this code).
    1.19 + *
    1.20 + * You should have received a copy of the GNU General Public License version
    1.21 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.23 + *
    1.24 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    1.25 + * or visit www.oracle.com if you need additional information or have any
    1.26 + * questions.
    1.27 + */
    1.28 +
    1.29 +/*
    1.30 + * @(#)MimeUtility.java       1.45 03/03/10
    1.31 + */
    1.32 +
    1.33 +
    1.34 +
    1.35 +package com.sun.xml.internal.messaging.saaj.packaging.mime.internet;
    1.36 +
    1.37 +import java.io.*;
    1.38 +import java.util.*;
    1.39 +
    1.40 +import javax.activation.DataHandler;
    1.41 +import javax.activation.DataSource;
    1.42 +
    1.43 +import com.sun.xml.internal.messaging.saaj.packaging.mime.MessagingException;
    1.44 +import com.sun.xml.internal.messaging.saaj.packaging.mime.util.*;
    1.45 +import com.sun.xml.internal.messaging.saaj.util.SAAJUtil;
    1.46 +
    1.47 +/**
    1.48 + * This is a utility class that provides various MIME related
    1.49 + * functionality. <p>
    1.50 + *
    1.51 + * There are a set of methods to encode and decode MIME headers as
    1.52 + * per RFC 2047. A brief description on handling such headers is
    1.53 + * given below: <p>
    1.54 + *
    1.55 + * RFC 822 mail headers <strong>must</strong> contain only US-ASCII
    1.56 + * characters. Headers that contain non US-ASCII characters must be
    1.57 + * encoded so that they contain only US-ASCII characters. Basically,
    1.58 + * this process involves using either BASE64 or QP to encode certain
    1.59 + * characters. RFC 2047 describes this in detail. <p>
    1.60 + *
    1.61 + * In Java, Strings contain (16 bit) Unicode characters. ASCII is a
    1.62 + * subset of Unicode (and occupies the range 0 - 127). A String
    1.63 + * that contains only ASCII characters is already mail-safe. If the
    1.64 + * String contains non US-ASCII characters, it must be encoded. An
    1.65 + * additional complexity in this step is that since Unicode is not
    1.66 + * yet a widely used charset, one might want to first charset-encode
    1.67 + * the String into another charset and then do the transfer-encoding.
    1.68 + * <p>
    1.69 + * Note that to get the actual bytes of a mail-safe String (say,
    1.70 + * for sending over SMTP), one must do
    1.71 + * <p><blockquote><pre>
    1.72 + *
    1.73 + *      byte[] bytes = string.getBytes("iso-8859-1");
    1.74 + *
    1.75 + * </pre></blockquote><p>
    1.76 + *
    1.77 + * The <code>setHeader</code> and <code>addHeader</code> methods
    1.78 + * on MimeMessage and MimeBodyPart assume that the given header values
    1.79 + * are Unicode strings that contain only US-ASCII characters. Hence
    1.80 + * the callers of those methods must insure that the values they pass
    1.81 + * do not contain non US-ASCII characters. The methods in this class
    1.82 + * help do this. <p>
    1.83 + *
    1.84 + * The <code>getHeader</code> family of methods on MimeMessage and
    1.85 + * MimeBodyPart return the raw header value. These might be encoded
    1.86 + * as per RFC 2047, and if so, must be decoded into Unicode Strings.
    1.87 + * The methods in this class help to do this. <p>
    1.88 + *
    1.89 + * Several System properties control strict conformance to the MIME
    1.90 + * spec.  Note that these are not session properties but must be set
    1.91 + * globally as System properties. <p>
    1.92 + *
    1.93 + * The <code>mail.mime.decodetext.strict</code> property controls
    1.94 + * decoding of MIME encoded words.  The MIME spec requires that encoded
    1.95 + * words start at the beginning of a whitespace separated word.  Some
    1.96 + * mailers incorrectly include encoded words in the middle of a word.
    1.97 + * If the <code>mail.mime.decodetext.strict</code> System property is
    1.98 + * set to <code>"false"</code>, an attempt will be made to decode these
    1.99 + * illegal encoded words. The default is true. <p>
   1.100 + *
   1.101 + * The <code>mail.mime.encodeeol.strict</code> property controls the
   1.102 + * choice of Content-Transfer-Encoding for MIME parts that are not of
   1.103 + * type "text".  Often such parts will contain textual data for which
   1.104 + * an encoding that allows normal end of line conventions is appropriate.
   1.105 + * In rare cases, such a part will appear to contain entirely textual
   1.106 + * data, but will require an encoding that preserves CR and LF characters
   1.107 + * without change.  If the <code>mail.mime.decodetext.strict</code>
   1.108 + * System property is set to <code>"true"</code>, such an encoding will
   1.109 + * be used when necessary.  The default is false. <p>
   1.110 + *
   1.111 + * In addition, the <code>mail.mime.charset</code> System property can
   1.112 + * be used to specify the default MIME charset to use for encoded words
   1.113 + * and text parts that don't otherwise specify a charset.  Normally, the
   1.114 + * default MIME charset is derived from the default Java charset, as
   1.115 + * specified in the <code>file.encoding</code> System property.  Most
   1.116 + * applications will have no need to explicitly set the default MIME
   1.117 + * charset.  In cases where the default MIME charset to be used for
   1.118 + * mail messages is different than the charset used for files stored on
   1.119 + * the system, this property should be set.
   1.120 + *
   1.121 + * @version 1.45, 03/03/10
   1.122 + * @author  John Mani
   1.123 + * @author  Bill Shannon
   1.124 + */
   1.125 +
   1.126 +public class MimeUtility {
   1.127 +
   1.128 +    // This class cannot be instantiated
   1.129 +    private MimeUtility() { }
   1.130 +
   1.131 +    public static final int ALL = -1;
   1.132 +
   1.133 +    private static final int BUFFER_SIZE = 1024;
   1.134 +    private static boolean decodeStrict = true;
   1.135 +    private static boolean encodeEolStrict = false;
   1.136 +    private static boolean foldEncodedWords = false;
   1.137 +    private static boolean foldText = true;
   1.138 +
   1.139 +    static {
   1.140 +        try {
   1.141 +            String s = SAAJUtil.getSystemProperty("mail.mime.decodetext.strict");
   1.142 +            // default to true
   1.143 +            decodeStrict = s == null || !s.equalsIgnoreCase("false");
   1.144 +            s = SAAJUtil.getSystemProperty("mail.mime.encodeeol.strict");
   1.145 +            // default to false
   1.146 +            encodeEolStrict = s != null && s.equalsIgnoreCase("true");
   1.147 +            s = SAAJUtil.getSystemProperty("mail.mime.foldencodedwords");
   1.148 +            // default to false
   1.149 +            foldEncodedWords = s != null && s.equalsIgnoreCase("true");
   1.150 +            s = SAAJUtil.getSystemProperty("mail.mime.foldtext");
   1.151 +            // default to true
   1.152 +            foldText = s == null || !s.equalsIgnoreCase("false");
   1.153 +        } catch (SecurityException sex) {
   1.154 +            // ignore it
   1.155 +        }
   1.156 +    }
   1.157 +
   1.158 +
   1.159 +    /**
   1.160 +     * Get the content-transfer-encoding that should be applied
   1.161 +     * to the input stream of this datasource, to make it mailsafe. <p>
   1.162 +     *
   1.163 +     * The algorithm used here is: <br>
   1.164 +     * <ul>
   1.165 +     * <li>
   1.166 +     * If the primary type of this datasource is "text" and if all
   1.167 +     * the bytes in its input stream are US-ASCII, then the encoding
   1.168 +     * is "7bit". If more than half of the bytes are non-US-ASCII, then
   1.169 +     * the encoding is "base64". If less than half of the bytes are
   1.170 +     * non-US-ASCII, then the encoding is "quoted-printable".
   1.171 +     * <li>
   1.172 +     * If the primary type of this datasource is not "text", then if
   1.173 +     * all the bytes of its input stream are US-ASCII, the encoding
   1.174 +     * is "7bit". If there is even one non-US-ASCII character, the
   1.175 +     * encoding is "base64".
   1.176 +     * </ul>
   1.177 +     *
   1.178 +     * @param   ds      DataSource
   1.179 +     * @return          the encoding. This is either "7bit",
   1.180 +     *                  "quoted-printable" or "base64"
   1.181 +     */
   1.182 +    public static String getEncoding(DataSource ds) {
   1.183 +        ContentType cType = null;
   1.184 +        InputStream is = null;
   1.185 +        String encoding = null;
   1.186 +
   1.187 +        try {
   1.188 +            cType = new ContentType(ds.getContentType());
   1.189 +            is = ds.getInputStream();
   1.190 +        } catch (Exception ex) {
   1.191 +            return "base64"; // what else ?!
   1.192 +        }
   1.193 +
   1.194 +        boolean isText = cType.match("text/*");
   1.195 +        // if not text, stop processing when we see non-ASCII
   1.196 +        int i = checkAscii(is, ALL, !isText);
   1.197 +        switch (i) {
   1.198 +        case ALL_ASCII:
   1.199 +            encoding = "7bit"; // all ascii
   1.200 +            break;
   1.201 +        case MOSTLY_ASCII:
   1.202 +            encoding = "quoted-printable"; // mostly ascii
   1.203 +            break;
   1.204 +        default:
   1.205 +            encoding = "base64"; // mostly binary
   1.206 +            break;
   1.207 +        }
   1.208 +
   1.209 +        // Close the input stream
   1.210 +        try {
   1.211 +            is.close();
   1.212 +        } catch (IOException ioex) { }
   1.213 +
   1.214 +        return encoding;
   1.215 +    }
   1.216 +
   1.217 +    /**
   1.218 +     * Same as <code>getEncoding(DataSource)</code> except that instead
   1.219 +     * of reading the data from an <code>InputStream</code> it uses the
   1.220 +     * <code>writeTo</code> method to examine the data.  This is more
   1.221 +     * efficient in the common case of a <code>DataHandler</code>
   1.222 +     * created with an object and a MIME type (for example, a
   1.223 +     * "text/plain" String) because all the I/O is done in this
   1.224 +     * thread.  In the case requiring an <code>InputStream</code> the
   1.225 +     * <code>DataHandler</code> uses a thread, a pair of pipe streams,
   1.226 +     * and the <code>writeTo</code> method to produce the data. <p>
   1.227 +     *
   1.228 +     * @since   JavaMail 1.2
   1.229 +     */
   1.230 +    public static String getEncoding(DataHandler dh) {
   1.231 +        ContentType cType = null;
   1.232 +        String encoding = null;
   1.233 +
   1.234 +        /*
   1.235 +         * Try to pick the most efficient means of determining the
   1.236 +         * encoding.  If this DataHandler was created using a DataSource,
   1.237 +         * the getEncoding(DataSource) method is typically faster.  If
   1.238 +         * the DataHandler was created with an object, this method is
   1.239 +         * much faster.  To distinguish the two cases, we use a heuristic.
   1.240 +         * A DataHandler created with an object will always have a null name.
   1.241 +         * A DataHandler created with a DataSource will usually have a
   1.242 +         * non-null name.
   1.243 +         *
   1.244 +         * XXX - This is actually quite a disgusting hack, but it makes
   1.245 +         *       a common case run over twice as fast.
   1.246 +         */
   1.247 +        if (dh.getName() != null)
   1.248 +            return getEncoding(dh.getDataSource());
   1.249 +
   1.250 +        try {
   1.251 +            cType = new ContentType(dh.getContentType());
   1.252 +        } catch (Exception ex) {
   1.253 +            return "base64"; // what else ?!
   1.254 +        }
   1.255 +
   1.256 +        if (cType.match("text/*")) {
   1.257 +            // Check all of the available bytes
   1.258 +            AsciiOutputStream aos = new AsciiOutputStream(false, false);
   1.259 +            try {
   1.260 +                dh.writeTo(aos);
   1.261 +            } catch (IOException ex) { }        // ignore it
   1.262 +            switch (aos.getAscii()) {
   1.263 +            case ALL_ASCII:
   1.264 +                encoding = "7bit"; // all ascii
   1.265 +                break;
   1.266 +            case MOSTLY_ASCII:
   1.267 +                encoding = "quoted-printable"; // mostly ascii
   1.268 +                break;
   1.269 +            default:
   1.270 +                encoding = "base64"; // mostly binary
   1.271 +                break;
   1.272 +            }
   1.273 +        } else { // not "text"
   1.274 +            // Check all of available bytes, break out if we find
   1.275 +            // at least one non-US-ASCII character
   1.276 +            AsciiOutputStream aos =
   1.277 +                        new AsciiOutputStream(true, encodeEolStrict);
   1.278 +            try {
   1.279 +                dh.writeTo(aos);
   1.280 +            } catch (IOException ex) { }        // ignore it
   1.281 +            if (aos.getAscii() == ALL_ASCII) // all ascii
   1.282 +                encoding = "7bit";
   1.283 +            else // found atleast one non-ascii character, use b64
   1.284 +                encoding = "base64";
   1.285 +        }
   1.286 +
   1.287 +        return encoding;
   1.288 +    }
   1.289 +
   1.290 +    /**
   1.291 +     * Decode the given input stream. The Input stream returned is
   1.292 +     * the decoded input stream. All the encodings defined in RFC 2045
   1.293 +     * are supported here. They include "base64", "quoted-printable",
   1.294 +     * "7bit", "8bit", and "binary". In addition, "uuencode" is also
   1.295 +     * supported.
   1.296 +     *
   1.297 +     * @param   is              input stream
   1.298 +     * @param   encoding        the encoding of the stream.
   1.299 +     * @return                  decoded input stream.
   1.300 +     */
   1.301 +    public static InputStream decode(InputStream is, String encoding)
   1.302 +                throws MessagingException {
   1.303 +        if (encoding.equalsIgnoreCase("base64"))
   1.304 +            return new BASE64DecoderStream(is);
   1.305 +        else if (encoding.equalsIgnoreCase("quoted-printable"))
   1.306 +            return new QPDecoderStream(is);
   1.307 +        else if (encoding.equalsIgnoreCase("uuencode") ||
   1.308 +                 encoding.equalsIgnoreCase("x-uuencode") ||
   1.309 +                 encoding.equalsIgnoreCase("x-uue"))
   1.310 +            return new UUDecoderStream(is);
   1.311 +        else if (encoding.equalsIgnoreCase("binary") ||
   1.312 +                 encoding.equalsIgnoreCase("7bit") ||
   1.313 +                 encoding.equalsIgnoreCase("8bit"))
   1.314 +            return is;
   1.315 +        else
   1.316 +            throw new MessagingException("Unknown encoding: " + encoding);
   1.317 +    }
   1.318 +
   1.319 +    /**
   1.320 +     * Wrap an encoder around the given output stream.
   1.321 +     * All the encodings defined in RFC 2045 are supported here.
   1.322 +     * They include "base64", "quoted-printable", "7bit", "8bit" and
   1.323 +     * "binary". In addition, "uuencode" is also supported.
   1.324 +     *
   1.325 +     * @param   os              output stream
   1.326 +     * @param   encoding        the encoding of the stream.
   1.327 +     * @return                  output stream that applies the
   1.328 +     *                          specified encoding.
   1.329 +     */
   1.330 +    public static OutputStream encode(OutputStream os, String encoding)
   1.331 +                throws MessagingException {
   1.332 +        if (encoding == null)
   1.333 +            return os;
   1.334 +        else if (encoding.equalsIgnoreCase("base64"))
   1.335 +            return new BASE64EncoderStream(os);
   1.336 +        else if (encoding.equalsIgnoreCase("quoted-printable"))
   1.337 +            return new QPEncoderStream(os);
   1.338 +        else if (encoding.equalsIgnoreCase("uuencode") ||
   1.339 +                 encoding.equalsIgnoreCase("x-uuencode") ||
   1.340 +                 encoding.equalsIgnoreCase("x-uue"))
   1.341 +            return new UUEncoderStream(os);
   1.342 +        else if (encoding.equalsIgnoreCase("binary") ||
   1.343 +                 encoding.equalsIgnoreCase("7bit") ||
   1.344 +                 encoding.equalsIgnoreCase("8bit"))
   1.345 +            return os;
   1.346 +        else
   1.347 +            throw new MessagingException("Unknown encoding: " +encoding);
   1.348 +    }
   1.349 +
   1.350 +    /**
   1.351 +     * Wrap an encoder around the given output stream.
   1.352 +     * All the encodings defined in RFC 2045 are supported here.
   1.353 +     * They include "base64", "quoted-printable", "7bit", "8bit" and
   1.354 +     * "binary". In addition, "uuencode" is also supported.
   1.355 +     * The <code>filename</code> parameter is used with the "uuencode"
   1.356 +     * encoding and is included in the encoded output.
   1.357 +     *
   1.358 +     * @param   os              output stream
   1.359 +     * @param   encoding        the encoding of the stream.
   1.360 +     * @param   filename        name for the file being encoded (only used
   1.361 +     *                          with uuencode)
   1.362 +     * @return                  output stream that applies the
   1.363 +     *                          specified encoding.
   1.364 +     * @since                   JavaMail 1.2
   1.365 +     */
   1.366 +    public static OutputStream encode(OutputStream os, String encoding,
   1.367 +                                      String filename)
   1.368 +                throws MessagingException {
   1.369 +        if (encoding == null)
   1.370 +            return os;
   1.371 +        else if (encoding.equalsIgnoreCase("base64"))
   1.372 +            return new BASE64EncoderStream(os);
   1.373 +        else if (encoding.equalsIgnoreCase("quoted-printable"))
   1.374 +            return new QPEncoderStream(os);
   1.375 +        else if (encoding.equalsIgnoreCase("uuencode") ||
   1.376 +                 encoding.equalsIgnoreCase("x-uuencode") ||
   1.377 +                 encoding.equalsIgnoreCase("x-uue"))
   1.378 +            return new UUEncoderStream(os, filename);
   1.379 +        else if (encoding.equalsIgnoreCase("binary") ||
   1.380 +                 encoding.equalsIgnoreCase("7bit") ||
   1.381 +                 encoding.equalsIgnoreCase("8bit"))
   1.382 +            return os;
   1.383 +        else
   1.384 +            throw new MessagingException("Unknown encoding: " +encoding);
   1.385 +    }
   1.386 +
   1.387 +    /**
   1.388 +     * Encode a RFC 822 "text" token into mail-safe form as per
   1.389 +     * RFC 2047. <p>
   1.390 +     *
   1.391 +     * The given Unicode string is examined for non US-ASCII
   1.392 +     * characters. If the string contains only US-ASCII characters,
   1.393 +     * it is returned as-is.  If the string contains non US-ASCII
   1.394 +     * characters, it is first character-encoded using the platform's
   1.395 +     * default charset, then transfer-encoded using either the B or
   1.396 +     * Q encoding. The resulting bytes are then returned as a Unicode
   1.397 +     * string containing only ASCII  characters. <p>
   1.398 +     *
   1.399 +     * Note that this method should be used to encode only
   1.400 +     * "unstructured" RFC 822 headers. <p>
   1.401 +     *
   1.402 +     * Example of usage:
   1.403 +     * <p><blockquote><pre>
   1.404 +     *
   1.405 +     *  MimeBodyPart part = ...
   1.406 +     *  String rawvalue = "FooBar Mailer, Japanese version 1.1"
   1.407 +     *  try {
   1.408 +     *    // If we know for sure that rawvalue contains only US-ASCII
   1.409 +     *    // characters, we can skip the encoding part
   1.410 +     *    part.setHeader("X-mailer", MimeUtility.encodeText(rawvalue));
   1.411 +     *  } catch (UnsupportedEncodingException e) {
   1.412 +     *    // encoding failure
   1.413 +     *  } catch (MessagingException me) {
   1.414 +     *   // setHeader() failure
   1.415 +     *  }
   1.416 +     *
   1.417 +     * </pre></blockquote><p>
   1.418 +     *
   1.419 +     * @param   text    unicode string
   1.420 +     * @return  Unicode string containing only US-ASCII characters
   1.421 +     * @exception UnsupportedEncodingException if the encoding fails
   1.422 +     */
   1.423 +    public static String encodeText(String text)
   1.424 +                        throws UnsupportedEncodingException {
   1.425 +        return encodeText(text, null, null);
   1.426 +    }
   1.427 +
   1.428 +    /**
   1.429 +     * Encode a RFC 822 "text" token into mail-safe form as per
   1.430 +     * RFC 2047. <p>
   1.431 +     *
   1.432 +     * The given Unicode string is examined for non US-ASCII
   1.433 +     * characters. If the string contains only US-ASCII characters,
   1.434 +     * it is returned as-is.  If the string contains non US-ASCII
   1.435 +     * characters, it is first character-encoded using the specified
   1.436 +     * charset, then transfer-encoded using either the B or Q encoding.
   1.437 +     * The resulting bytes are then returned as a Unicode string
   1.438 +     * containing only ASCII characters. <p>
   1.439 +     *
   1.440 +     * Note that this method should be used to encode only
   1.441 +     * "unstructured" RFC 822 headers.
   1.442 +     *
   1.443 +     * @param   text    the header value
   1.444 +     * @param   charset the charset. If this parameter is null, the
   1.445 +     *          platform's default chatset is used.
   1.446 +     * @param   encoding the encoding to be used. Currently supported
   1.447 +     *          values are "B" and "Q". If this parameter is null, then
   1.448 +     *          the "Q" encoding is used if most of characters to be
   1.449 +     *          encoded are in the ASCII charset, otherwise "B" encoding
   1.450 +     *          is used.
   1.451 +     * @return  Unicode string containing only US-ASCII characters
   1.452 +     */
   1.453 +    public static String encodeText(String text, String charset,
   1.454 +                                    String encoding)
   1.455 +                        throws UnsupportedEncodingException {
   1.456 +        return encodeWord(text, charset, encoding, false);
   1.457 +    }
   1.458 +
   1.459 +    /**
   1.460 +     * Decode "unstructured" headers, that is, headers that are defined
   1.461 +     * as '*text' as per RFC 822. <p>
   1.462 +     *
   1.463 +     * The string is decoded using the algorithm specified in
   1.464 +     * RFC 2047, Section 6.1.1. If the charset-conversion fails
   1.465 +     * for any sequence, an UnsupportedEncodingException is thrown.
   1.466 +     * If the String is not an RFC 2047 style encoded header, it is
   1.467 +     * returned as-is <p>
   1.468 +     *
   1.469 +     * Example of usage:
   1.470 +     * <p><blockquote><pre>
   1.471 +     *
   1.472 +     *  MimeBodyPart part = ...
   1.473 +     *  String rawvalue = null;
   1.474 +     *  String  value = null;
   1.475 +     *  try {
   1.476 +     *    if ((rawvalue = part.getHeader("X-mailer")[0]) != null)
   1.477 +     *      value = MimeUtility.decodeText(rawvalue);
   1.478 +     *  } catch (UnsupportedEncodingException e) {
   1.479 +     *      // Don't care
   1.480 +     *      value = rawvalue;
   1.481 +     *  } catch (MessagingException me) { }
   1.482 +     *
   1.483 +     *  return value;
   1.484 +     *
   1.485 +     * </pre></blockquote><p>
   1.486 +     *
   1.487 +     * @param   etext   the possibly encoded value
   1.488 +     * @exception       UnsupportedEncodingException if the charset
   1.489 +     *                  conversion failed.
   1.490 +     */
   1.491 +    public static String decodeText(String etext)
   1.492 +                throws UnsupportedEncodingException {
   1.493 +        /*
   1.494 +         * We look for sequences separated by "linear-white-space".
   1.495 +         * (as per RFC 2047, Section 6.1.1)
   1.496 +         * RFC 822 defines "linear-white-space" as SPACE | HT | CR | NL.
   1.497 +         */
   1.498 +        String lwsp = " \t\n\r";
   1.499 +        StringTokenizer st;
   1.500 +
   1.501 +        /*
   1.502 +         * First, lets do a quick run thru the string and check
   1.503 +         * whether the sequence "=?"  exists at all. If none exists,
   1.504 +         * we know there are no encoded-words in here and we can just
   1.505 +         * return the string as-is, without suffering thru the later
   1.506 +         * decoding logic.
   1.507 +         * This handles the most common case of unencoded headers
   1.508 +         * efficiently.
   1.509 +         */
   1.510 +        if (etext.indexOf("=?") == -1)
   1.511 +            return etext;
   1.512 +
   1.513 +        // Encoded words found. Start decoding ...
   1.514 +
   1.515 +        st = new StringTokenizer(etext, lwsp, true);
   1.516 +        StringBuffer sb = new StringBuffer();  // decode buffer
   1.517 +        StringBuffer wsb = new StringBuffer(); // white space buffer
   1.518 +        boolean prevWasEncoded = false;
   1.519 +
   1.520 +        while (st.hasMoreTokens()) {
   1.521 +            char c;
   1.522 +            String s = st.nextToken();
   1.523 +            // If whitespace, append it to the whitespace buffer
   1.524 +            if (((c = s.charAt(0)) == ' ') || (c == '\t') ||
   1.525 +                (c == '\r') || (c == '\n'))
   1.526 +                wsb.append(c);
   1.527 +            else {
   1.528 +                // Check if token is an 'encoded-word' ..
   1.529 +                String word;
   1.530 +                try {
   1.531 +                    word = decodeWord(s);
   1.532 +                    // Yes, this IS an 'encoded-word'.
   1.533 +                    if (!prevWasEncoded && wsb.length() > 0) {
   1.534 +                        // if the previous word was also encoded, we
   1.535 +                        // should ignore the collected whitespace. Else
   1.536 +                        // we include the whitespace as well.
   1.537 +                        sb.append(wsb);
   1.538 +                    }
   1.539 +                    prevWasEncoded = true;
   1.540 +                } catch (ParseException pex) {
   1.541 +                    // This is NOT an 'encoded-word'.
   1.542 +                    word = s;
   1.543 +                    // possibly decode inner encoded words
   1.544 +                    if (!decodeStrict)
   1.545 +                        word = decodeInnerWords(word);
   1.546 +                    // include colleced whitespace ..
   1.547 +                    if (wsb.length() > 0)
   1.548 +                        sb.append(wsb);
   1.549 +                    prevWasEncoded = false;
   1.550 +                }
   1.551 +                sb.append(word); // append the actual word
   1.552 +                wsb.setLength(0); // reset wsb for reuse
   1.553 +            }
   1.554 +        }
   1.555 +        return sb.toString();
   1.556 +    }
   1.557 +
   1.558 +    /**
   1.559 +     * Encode a RFC 822 "word" token into mail-safe form as per
   1.560 +     * RFC 2047. <p>
   1.561 +     *
   1.562 +     * The given Unicode string is examined for non US-ASCII
   1.563 +     * characters. If the string contains only US-ASCII characters,
   1.564 +     * it is returned as-is.  If the string contains non US-ASCII
   1.565 +     * characters, it is first character-encoded using the platform's
   1.566 +     * default charset, then transfer-encoded using either the B or
   1.567 +     * Q encoding. The resulting bytes are then returned as a Unicode
   1.568 +     * string containing only ASCII  characters. <p>
   1.569 +     *
   1.570 +     * This method is meant to be used when creating RFC 822 "phrases".
   1.571 +     * The InternetAddress class, for example, uses this to encode
   1.572 +     * it's 'phrase' component.
   1.573 +     *
   1.574 +     * @param   text    unicode string
   1.575 +     * @return  Array of Unicode strings containing only US-ASCII
   1.576 +     *          characters.
   1.577 +     * @exception UnsupportedEncodingException if the encoding fails
   1.578 +     */
   1.579 +    public static String encodeWord(String word)
   1.580 +                        throws UnsupportedEncodingException {
   1.581 +        return encodeWord(word, null, null);
   1.582 +    }
   1.583 +
   1.584 +    /**
   1.585 +     * Encode a RFC 822 "word" token into mail-safe form as per
   1.586 +     * RFC 2047. <p>
   1.587 +     *
   1.588 +     * The given Unicode string is examined for non US-ASCII
   1.589 +     * characters. If the string contains only US-ASCII characters,
   1.590 +     * it is returned as-is.  If the string contains non US-ASCII
   1.591 +     * characters, it is first character-encoded using the specified
   1.592 +     * charset, then transfer-encoded using either the B or Q encoding.
   1.593 +     * The resulting bytes are then returned as a Unicode string
   1.594 +     * containing only ASCII characters. <p>
   1.595 +     *
   1.596 +     * @param   text    unicode string
   1.597 +     * @param   charset the MIME charset
   1.598 +     * @param   encoding the encoding to be used. Currently supported
   1.599 +     *          values are "B" and "Q". If this parameter is null, then
   1.600 +     *          the "Q" encoding is used if most of characters to be
   1.601 +     *          encoded are in the ASCII charset, otherwise "B" encoding
   1.602 +     *          is used.
   1.603 +     * @return  Unicode string containing only US-ASCII characters
   1.604 +     * @exception UnsupportedEncodingException if the encoding fails
   1.605 +     */
   1.606 +    public static String encodeWord(String word, String charset,
   1.607 +                                    String encoding)
   1.608 +                        throws UnsupportedEncodingException {
   1.609 +        return encodeWord(word, charset, encoding, true);
   1.610 +    }
   1.611 +
   1.612 +    /*
   1.613 +     * Encode the given string. The parameter 'encodingWord' should
   1.614 +     * be true if a RFC 822 "word" token is being encoded and false if a
   1.615 +     * RFC 822 "text" token is being encoded. This is because the
   1.616 +     * "Q" encoding defined in RFC 2047 has more restrictions when
   1.617 +     * encoding "word" tokens. (Sigh)
   1.618 +     */
   1.619 +    private static String encodeWord(String string, String charset,
   1.620 +                                     String encoding, boolean encodingWord)
   1.621 +                        throws UnsupportedEncodingException {
   1.622 +
   1.623 +        // If 'string' contains only US-ASCII characters, just
   1.624 +        // return it.
   1.625 +        int ascii = checkAscii(string);
   1.626 +        if (ascii == ALL_ASCII)
   1.627 +            return string;
   1.628 +
   1.629 +        // Else, apply the specified charset conversion.
   1.630 +        String jcharset;
   1.631 +        if (charset == null) { // use default charset
   1.632 +            jcharset = getDefaultJavaCharset(); // the java charset
   1.633 +            charset = getDefaultMIMECharset(); // the MIME equivalent
   1.634 +        } else // MIME charset -> java charset
   1.635 +            jcharset = javaCharset(charset);
   1.636 +
   1.637 +        // If no transfer-encoding is specified, figure one out.
   1.638 +        if (encoding == null) {
   1.639 +            if (ascii != MOSTLY_NONASCII)
   1.640 +                encoding = "Q";
   1.641 +            else
   1.642 +                encoding = "B";
   1.643 +        }
   1.644 +
   1.645 +        boolean b64;
   1.646 +        if (encoding.equalsIgnoreCase("B"))
   1.647 +            b64 = true;
   1.648 +        else if (encoding.equalsIgnoreCase("Q"))
   1.649 +            b64 = false;
   1.650 +        else
   1.651 +            throw new UnsupportedEncodingException(
   1.652 +                        "Unknown transfer encoding: " + encoding);
   1.653 +
   1.654 +        StringBuffer outb = new StringBuffer(); // the output buffer
   1.655 +        doEncode(string, b64, jcharset,
   1.656 +                 // As per RFC 2047, size of an encoded string should not
   1.657 +                 // exceed 75 bytes.
   1.658 +                 // 7 = size of "=?", '?', 'B'/'Q', '?', "?="
   1.659 +                 75 - 7 - charset.length(), // the available space
   1.660 +                 "=?" + charset + "?" + encoding + "?", // prefix
   1.661 +                 true, encodingWord, outb);
   1.662 +
   1.663 +        return outb.toString();
   1.664 +    }
   1.665 +
   1.666 +    private static void doEncode(String string, boolean b64,
   1.667 +                String jcharset, int avail, String prefix,
   1.668 +                boolean first, boolean encodingWord, StringBuffer buf)
   1.669 +                        throws UnsupportedEncodingException {
   1.670 +
   1.671 +        // First find out what the length of the encoded version of
   1.672 +        // 'string' would be.
   1.673 +        byte[] bytes = string.getBytes(jcharset);
   1.674 +        int len;
   1.675 +        if (b64) // "B" encoding
   1.676 +            len = BEncoderStream.encodedLength(bytes);
   1.677 +        else // "Q"
   1.678 +            len = QEncoderStream.encodedLength(bytes, encodingWord);
   1.679 +
   1.680 +        int size;
   1.681 +        if ((len > avail) && ((size = string.length()) > 1)) {
   1.682 +            // If the length is greater than 'avail', split 'string'
   1.683 +            // into two and recurse.
   1.684 +            doEncode(string.substring(0, size/2), b64, jcharset,
   1.685 +                     avail, prefix, first, encodingWord, buf);
   1.686 +            doEncode(string.substring(size/2, size), b64, jcharset,
   1.687 +                     avail, prefix, false, encodingWord, buf);
   1.688 +        } else {
   1.689 +            // length <= than 'avail'. Encode the given string
   1.690 +            ByteArrayOutputStream os = new ByteArrayOutputStream(BUFFER_SIZE);
   1.691 +            OutputStream eos; // the encoder
   1.692 +            if (b64) // "B" encoding
   1.693 +                eos = new BEncoderStream(os);
   1.694 +            else // "Q" encoding
   1.695 +                eos = new QEncoderStream(os, encodingWord);
   1.696 +
   1.697 +            try { // do the encoding
   1.698 +                eos.write(bytes);
   1.699 +                eos.close();
   1.700 +            } catch (IOException ioex) { }
   1.701 +
   1.702 +            byte[] encodedBytes = os.toByteArray(); // the encoded stuff
   1.703 +            // Now write out the encoded (all ASCII) bytes into our
   1.704 +            // StringBuffer
   1.705 +            if (!first) // not the first line of this sequence
   1.706 +                if (foldEncodedWords)
   1.707 +                    buf.append("\r\n "); // start a continuation line
   1.708 +                else
   1.709 +                    buf.append(" "); // line will be folded later
   1.710 +
   1.711 +            buf.append(prefix);
   1.712 +            for (int i = 0; i < encodedBytes.length; i++)
   1.713 +                buf.append((char)encodedBytes[i]);
   1.714 +            buf.append("?="); // terminate the current sequence
   1.715 +        }
   1.716 +    }
   1.717 +
   1.718 +    /**
   1.719 +     * The string is parsed using the rules in RFC 2047 for parsing
   1.720 +     * an "encoded-word". If the parse fails, a ParseException is
   1.721 +     * thrown. Otherwise, it is transfer-decoded, and then
   1.722 +     * charset-converted into Unicode. If the charset-conversion
   1.723 +     * fails, an UnsupportedEncodingException is thrown.<p>
   1.724 +     *
   1.725 +     * @param   eword   the possibly encoded value
   1.726 +     * @exception       ParseException if the string is not an
   1.727 +     *                  encoded-word as per RFC 2047.
   1.728 +     * @exception       UnsupportedEncodingException if the charset
   1.729 +     *                  conversion failed.
   1.730 +     */
   1.731 +    public static String decodeWord(String eword)
   1.732 +                throws ParseException, UnsupportedEncodingException {
   1.733 +
   1.734 +        if (!eword.startsWith("=?")) // not an encoded word
   1.735 +            throw new ParseException();
   1.736 +
   1.737 +        // get charset
   1.738 +        int start = 2; int pos;
   1.739 +        if ((pos = eword.indexOf('?', start)) == -1)
   1.740 +            throw new ParseException();
   1.741 +        String charset = javaCharset(eword.substring(start, pos));
   1.742 +
   1.743 +        // get encoding
   1.744 +        start = pos+1;
   1.745 +        if ((pos = eword.indexOf('?', start)) == -1)
   1.746 +            throw new ParseException();
   1.747 +        String encoding = eword.substring(start, pos);
   1.748 +
   1.749 +        // get encoded-sequence
   1.750 +        start = pos+1;
   1.751 +        if ((pos = eword.indexOf("?=", start)) == -1)
   1.752 +            throw new ParseException();
   1.753 +        String word = eword.substring(start, pos);
   1.754 +
   1.755 +        try {
   1.756 +            // Extract the bytes from word
   1.757 +            ByteArrayInputStream bis =
   1.758 +                new ByteArrayInputStream(ASCIIUtility.getBytes(word));
   1.759 +
   1.760 +            // Get the appropriate decoder
   1.761 +            InputStream is;
   1.762 +            if (encoding.equalsIgnoreCase("B"))
   1.763 +                is = new BASE64DecoderStream(bis);
   1.764 +            else if (encoding.equalsIgnoreCase("Q"))
   1.765 +                is = new QDecoderStream(bis);
   1.766 +            else
   1.767 +                throw new UnsupportedEncodingException(
   1.768 +                                "unknown encoding: " + encoding);
   1.769 +
   1.770 +            // For b64 & q, size of decoded word <= size of word. So
   1.771 +            // the decoded bytes must fit into the 'bytes' array. This
   1.772 +            // is certainly more efficient than writing bytes into a
   1.773 +            // ByteArrayOutputStream and then pulling out the byte[]
   1.774 +            // from it.
   1.775 +            int count = bis.available();
   1.776 +            byte[] bytes = new byte[count];
   1.777 +            // count is set to the actual number of decoded bytes
   1.778 +            count = is.read(bytes, 0, count);
   1.779 +
   1.780 +            // Finally, convert the decoded bytes into a String using
   1.781 +            // the specified charset
   1.782 +            String s = new String(bytes, 0, count, charset);
   1.783 +            if (pos + 2 < eword.length()) {
   1.784 +                // there's still more text in the string
   1.785 +                String rest = eword.substring(pos + 2);
   1.786 +                if (!decodeStrict)
   1.787 +                    rest = decodeInnerWords(rest);
   1.788 +                s += rest;
   1.789 +            }
   1.790 +            return s;
   1.791 +        } catch (UnsupportedEncodingException uex) {
   1.792 +            // explicitly catch and rethrow this exception, otherwise
   1.793 +            // the below IOException catch will swallow this up!
   1.794 +            throw uex;
   1.795 +        } catch (IOException ioex) {
   1.796 +            // Shouldn't happen.
   1.797 +            throw new ParseException();
   1.798 +        } catch (IllegalArgumentException iex) {
   1.799 +            /* An unknown charset of the form ISO-XXX-XXX, will cause
   1.800 +             * the JDK to throw an IllegalArgumentException ... Since the
   1.801 +             * JDK will attempt to create a classname using this string,
   1.802 +             * but valid classnames must not contain the character '-',
   1.803 +             * and this results in an IllegalArgumentException, rather than
   1.804 +             * the expected UnsupportedEncodingException. Yikes
   1.805 +             */
   1.806 +            throw new UnsupportedEncodingException();
   1.807 +        }
   1.808 +    }
   1.809 +
   1.810 +    /**
   1.811 +     * Look for encoded words within a word.  The MIME spec doesn't
   1.812 +     * allow this, but many broken mailers, especially Japanese mailers,
   1.813 +     * produce such incorrect encodings.
   1.814 +     */
   1.815 +    private static String decodeInnerWords(String word)
   1.816 +                                throws UnsupportedEncodingException {
   1.817 +        int start = 0, i;
   1.818 +        StringBuffer buf = new StringBuffer();
   1.819 +        while ((i = word.indexOf("=?", start)) >= 0) {
   1.820 +            buf.append(word.substring(start, i));
   1.821 +            int end = word.indexOf("?=", i);
   1.822 +            if (end < 0)
   1.823 +                break;
   1.824 +            String s = word.substring(i, end + 2);
   1.825 +            try {
   1.826 +                s = decodeWord(s);
   1.827 +            } catch (ParseException pex) {
   1.828 +                // ignore it, just use the original string
   1.829 +            }
   1.830 +            buf.append(s);
   1.831 +            start = end + 2;
   1.832 +        }
   1.833 +        if (start == 0)
   1.834 +            return word;
   1.835 +        if (start < word.length())
   1.836 +            buf.append(word.substring(start));
   1.837 +        return buf.toString();
   1.838 +    }
   1.839 +
   1.840 +    /**
   1.841 +     * A utility method to quote a word, if the word contains any
   1.842 +     * characters from the specified 'specials' list.<p>
   1.843 +     *
   1.844 +     * The <code>HeaderTokenizer</code> class defines two special
   1.845 +     * sets of delimiters - MIME and RFC 822. <p>
   1.846 +     *
   1.847 +     * This method is typically used during the generation of
   1.848 +     * RFC 822 and MIME header fields.
   1.849 +     *
   1.850 +     * @param   word    word to be quoted
   1.851 +     * @param   specials the set of special characters
   1.852 +     * @return          the possibly quoted word
   1.853 +     * @see     javax.mail.internet.HeaderTokenizer#MIME
   1.854 +     * @see     javax.mail.internet.HeaderTokenizer#RFC822
   1.855 +     */
   1.856 +    public static String quote(String word, String specials) {
   1.857 +        int len = word.length();
   1.858 +
   1.859 +        /*
   1.860 +         * Look for any "bad" characters, Escape and
   1.861 +         *  quote the entire string if necessary.
   1.862 +         */
   1.863 +        boolean needQuoting = false;
   1.864 +        for (int i = 0; i < len; i++) {
   1.865 +            char c = word.charAt(i);
   1.866 +            if (c == '"' || c == '\\' || c == '\r' || c == '\n') {
   1.867 +                // need to escape them and then quote the whole string
   1.868 +                StringBuffer sb = new StringBuffer(len + 3);
   1.869 +                sb.append('"');
   1.870 +                sb.append(word.substring(0, i));
   1.871 +                int lastc = 0;
   1.872 +                for (int j = i; j < len; j++) {
   1.873 +                    char cc = word.charAt(j);
   1.874 +                    if ((cc == '"') || (cc == '\\') ||
   1.875 +                        (cc == '\r') || (cc == '\n'))
   1.876 +                        if (cc == '\n' && lastc == '\r')
   1.877 +                            ;   // do nothing, CR was already escaped
   1.878 +                        else
   1.879 +                            sb.append('\\');    // Escape the character
   1.880 +                    sb.append(cc);
   1.881 +                    lastc = cc;
   1.882 +                }
   1.883 +                sb.append('"');
   1.884 +                return sb.toString();
   1.885 +            } else if (c < 040 || c >= 0177 || specials.indexOf(c) >= 0)
   1.886 +                // These characters cause the string to be quoted
   1.887 +                needQuoting = true;
   1.888 +        }
   1.889 +
   1.890 +        if (needQuoting) {
   1.891 +            StringBuffer sb = new StringBuffer(len + 2);
   1.892 +            sb.append('"').append(word).append('"');
   1.893 +            return sb.toString();
   1.894 +        } else
   1.895 +            return word;
   1.896 +    }
   1.897 +
   1.898 +    /**
   1.899 +     * Fold a string at linear whitespace so that each line is no longer
   1.900 +     * than 76 characters, if possible.  If there are more than 76
   1.901 +     * non-whitespace characters consecutively, the string is folded at
   1.902 +     * the first whitespace after that sequence.  The parameter
   1.903 +     * <code>used</code> indicates how many characters have been used in
   1.904 +     * the current line; it is usually the length of the header name. <p>
   1.905 +     *
   1.906 +     * Note that line breaks in the string aren't escaped; they probably
   1.907 +     * should be.
   1.908 +     *
   1.909 +     * @param   used    characters used in line so far
   1.910 +     * @param   s       the string to fold
   1.911 +     * @return          the folded string
   1.912 +     */
   1.913 +    /*public*/ static String fold(int used, String s) {
   1.914 +        if (!foldText)
   1.915 +            return s;
   1.916 +
   1.917 +        int end;
   1.918 +        char c;
   1.919 +        // Strip trailing spaces
   1.920 +        for (end = s.length() - 1; end >= 0; end--) {
   1.921 +            c = s.charAt(end);
   1.922 +            if (c != ' ' && c != '\t')
   1.923 +                break;
   1.924 +        }
   1.925 +        if (end != s.length() - 1)
   1.926 +            s = s.substring(0, end + 1);
   1.927 +
   1.928 +        // if the string fits now, just return it
   1.929 +        if (used + s.length() <= 76)
   1.930 +            return s;
   1.931 +
   1.932 +        // have to actually fold the string
   1.933 +        StringBuffer sb = new StringBuffer(s.length() + 4);
   1.934 +        char lastc = 0;
   1.935 +        while (used + s.length() > 76) {
   1.936 +            int lastspace = -1;
   1.937 +            for (int i = 0; i < s.length(); i++) {
   1.938 +                if (lastspace != -1 && used + i > 76)
   1.939 +                    break;
   1.940 +                c = s.charAt(i);
   1.941 +                if (c == ' ' || c == '\t')
   1.942 +                    if (!(lastc == ' ' || lastc == '\t'))
   1.943 +                        lastspace = i;
   1.944 +                lastc = c;
   1.945 +            }
   1.946 +            if (lastspace == -1) {
   1.947 +                // no space, use the whole thing
   1.948 +                sb.append(s);
   1.949 +                s = "";
   1.950 +                used = 0;
   1.951 +                break;
   1.952 +            }
   1.953 +            sb.append(s.substring(0, lastspace));
   1.954 +            sb.append("\r\n");
   1.955 +            lastc = s.charAt(lastspace);
   1.956 +            sb.append(lastc);
   1.957 +            s = s.substring(lastspace + 1);
   1.958 +            used = 1;
   1.959 +        }
   1.960 +        sb.append(s);
   1.961 +        return sb.toString();
   1.962 +    }
   1.963 +
   1.964 +    /**
   1.965 +     * Unfold a folded header.  Any line breaks that aren't escaped and
   1.966 +     * are followed by whitespace are removed.
   1.967 +     *
   1.968 +     * @param   s       the string to unfold
   1.969 +     * @return          the unfolded string
   1.970 +     */
   1.971 +    /*public*/ static String unfold(String s) {
   1.972 +        if (!foldText)
   1.973 +            return s;
   1.974 +
   1.975 +        StringBuffer sb = null;
   1.976 +        int i;
   1.977 +        while ((i = indexOfAny(s, "\r\n")) >= 0) {
   1.978 +            int start = i;
   1.979 +            int l = s.length();
   1.980 +            i++;                // skip CR or NL
   1.981 +            if (i < l && s.charAt(i - 1) == '\r' && s.charAt(i) == '\n')
   1.982 +                i++;    // skip LF
   1.983 +            if (start == 0 || s.charAt(start - 1) != '\\') {
   1.984 +                char c;
   1.985 +                // if next line starts with whitespace, skip all of it
   1.986 +                // XXX - always has to be true?
   1.987 +                if (i < l && ((c = s.charAt(i)) == ' ' || c == '\t')) {
   1.988 +                    i++;        // skip whitespace
   1.989 +                    while (i < l && ((c = s.charAt(i)) == ' ' || c == '\t'))
   1.990 +                        i++;
   1.991 +                    if (sb == null)
   1.992 +                        sb = new StringBuffer(s.length());
   1.993 +                    if (start != 0) {
   1.994 +                        sb.append(s.substring(0, start));
   1.995 +                        sb.append(' ');
   1.996 +                    }
   1.997 +                    s = s.substring(i);
   1.998 +                    continue;
   1.999 +                }
  1.1000 +                // it's not a continuation line, just leave it in
  1.1001 +                if (sb == null)
  1.1002 +                    sb = new StringBuffer(s.length());
  1.1003 +                sb.append(s.substring(0, i));
  1.1004 +                s = s.substring(i);
  1.1005 +            } else {
  1.1006 +                // there's a backslash at "start - 1"
  1.1007 +                // strip it out, but leave in the line break
  1.1008 +                if (sb == null)
  1.1009 +                    sb = new StringBuffer(s.length());
  1.1010 +                sb.append(s.substring(0, start - 1));
  1.1011 +                sb.append(s.substring(start, i));
  1.1012 +                s = s.substring(i);
  1.1013 +            }
  1.1014 +        }
  1.1015 +        if (sb != null) {
  1.1016 +            sb.append(s);
  1.1017 +            return sb.toString();
  1.1018 +        } else
  1.1019 +            return s;
  1.1020 +    }
  1.1021 +
  1.1022 +    /**
  1.1023 +     * Return the first index of any of the characters in "any" in "s",
  1.1024 +     * or -1 if none are found.
  1.1025 +     *
  1.1026 +     * This should be a method on String.
  1.1027 +     */
  1.1028 +    private static int indexOfAny(String s, String any) {
  1.1029 +        return indexOfAny(s, any, 0);
  1.1030 +    }
  1.1031 +
  1.1032 +    private static int indexOfAny(String s, String any, int start) {
  1.1033 +        try {
  1.1034 +            int len = s.length();
  1.1035 +            for (int i = start; i < len; i++) {
  1.1036 +                if (any.indexOf(s.charAt(i)) >= 0)
  1.1037 +                    return i;
  1.1038 +            }
  1.1039 +            return -1;
  1.1040 +        } catch (StringIndexOutOfBoundsException e) {
  1.1041 +            return -1;
  1.1042 +        }
  1.1043 +    }
  1.1044 +
  1.1045 +    /**
  1.1046 +     * Convert a MIME charset name into a valid Java charset name. <p>
  1.1047 +     *
  1.1048 +     * @param charset   the MIME charset name
  1.1049 +     * @return  the Java charset equivalent. If a suitable mapping is
  1.1050 +     *          not available, the passed in charset is itself returned.
  1.1051 +     */
  1.1052 +    public static String javaCharset(String charset) {
  1.1053 +        if (mime2java == null || charset == null)
  1.1054 +            // no mapping table, or charset parameter is null
  1.1055 +            return charset;
  1.1056 +
  1.1057 +        String alias = (String)mime2java.get(charset.toLowerCase());
  1.1058 +        return alias == null ? charset : alias;
  1.1059 +    }
  1.1060 +
  1.1061 +    /**
  1.1062 +     * Convert a java charset into its MIME charset name. <p>
  1.1063 +     *
  1.1064 +     * Note that a future version of JDK (post 1.2) might provide
  1.1065 +     * this functionality, in which case, we may deprecate this
  1.1066 +     * method then.
  1.1067 +     *
  1.1068 +     * @param   charset    the JDK charset
  1.1069 +     * @return          the MIME/IANA equivalent. If a mapping
  1.1070 +     *                  is not possible, the passed in charset itself
  1.1071 +     *                  is returned.
  1.1072 +     * @since           JavaMail 1.1
  1.1073 +     */
  1.1074 +    public static String mimeCharset(String charset) {
  1.1075 +        if (java2mime == null || charset == null)
  1.1076 +            // no mapping table or charset param is null
  1.1077 +            return charset;
  1.1078 +
  1.1079 +        String alias = (String)java2mime.get(charset.toLowerCase());
  1.1080 +        return alias == null ? charset : alias;
  1.1081 +    }
  1.1082 +
  1.1083 +    private static String defaultJavaCharset;
  1.1084 +    private static String defaultMIMECharset;
  1.1085 +
  1.1086 +    /**
  1.1087 +     * Get the default charset corresponding to the system's current
  1.1088 +     * default locale.  If the System property <code>mail.mime.charset</code>
  1.1089 +     * is set, a system charset corresponding to this MIME charset will be
  1.1090 +     * returned. <p>
  1.1091 +     *
  1.1092 +     * @return  the default charset of the system's default locale,
  1.1093 +     *          as a Java charset. (NOT a MIME charset)
  1.1094 +     * @since   JavaMail 1.1
  1.1095 +     */
  1.1096 +    public static String getDefaultJavaCharset() {
  1.1097 +        if (defaultJavaCharset == null) {
  1.1098 +            /*
  1.1099 +             * If mail.mime.charset is set, it controls the default
  1.1100 +             * Java charset as well.
  1.1101 +             */
  1.1102 +            String mimecs = null;
  1.1103 +
  1.1104 +            mimecs = SAAJUtil.getSystemProperty("mail.mime.charset");
  1.1105 +
  1.1106 +            if (mimecs != null && mimecs.length() > 0) {
  1.1107 +                defaultJavaCharset = javaCharset(mimecs);
  1.1108 +                return defaultJavaCharset;
  1.1109 +            }
  1.1110 +
  1.1111 +            try {
  1.1112 +                defaultJavaCharset = System.getProperty("file.encoding",
  1.1113 +                                                        "8859_1");
  1.1114 +            } catch (SecurityException sex) {
  1.1115 +
  1.1116 +                class NullInputStream extends InputStream {
  1.1117 +                    public int read() {
  1.1118 +                        return 0;
  1.1119 +                    }
  1.1120 +                }
  1.1121 +                InputStreamReader reader =
  1.1122 +                        new InputStreamReader(new NullInputStream());
  1.1123 +                defaultJavaCharset = reader.getEncoding();
  1.1124 +                if (defaultJavaCharset == null)
  1.1125 +                    defaultJavaCharset = "8859_1";
  1.1126 +            }
  1.1127 +        }
  1.1128 +
  1.1129 +        return defaultJavaCharset;
  1.1130 +    }
  1.1131 +
  1.1132 +    /*
  1.1133 +     * Get the default MIME charset for this locale.
  1.1134 +     */
  1.1135 +    static String getDefaultMIMECharset() {
  1.1136 +        if (defaultMIMECharset == null) {
  1.1137 +                defaultMIMECharset = SAAJUtil.getSystemProperty("mail.mime.charset");
  1.1138 +        }
  1.1139 +        if (defaultMIMECharset == null)
  1.1140 +            defaultMIMECharset = mimeCharset(getDefaultJavaCharset());
  1.1141 +        return defaultMIMECharset;
  1.1142 +    }
  1.1143 +
  1.1144 +    // Tables to map MIME charset names to Java names and vice versa.
  1.1145 +    // XXX - Should eventually use J2SE 1.4 java.nio.charset.Charset
  1.1146 +    private static Hashtable mime2java;
  1.1147 +    private static Hashtable java2mime;
  1.1148 +
  1.1149 +    static {
  1.1150 +        java2mime = new Hashtable(40);
  1.1151 +        mime2java = new Hashtable(10);
  1.1152 +
  1.1153 +        try {
  1.1154 +            // Use this class's classloader to load the mapping file
  1.1155 +            // XXX - we should use SecuritySupport, but it's in another package
  1.1156 +            InputStream is =
  1.1157 +                    com.sun.xml.internal.messaging.saaj.packaging.mime.internet.MimeUtility.class.getResourceAsStream(
  1.1158 +                    "/META-INF/javamail.charset.map");
  1.1159 +
  1.1160 +            if (is != null) {
  1.1161 +                is = new LineInputStream(is);
  1.1162 +
  1.1163 +                // Load the JDK-to-MIME charset mapping table
  1.1164 +                loadMappings((LineInputStream)is, java2mime);
  1.1165 +
  1.1166 +                // Load the MIME-to-JDK charset mapping table
  1.1167 +                loadMappings((LineInputStream)is, mime2java);
  1.1168 +            }
  1.1169 +        } catch (Exception ex) { }
  1.1170 +
  1.1171 +        // If we didn't load the tables, e.g., because we didn't have
  1.1172 +        // permission, load them manually.  The entries here should be
  1.1173 +        // the same as the default javamail.charset.map.
  1.1174 +        if (java2mime.isEmpty()) {
  1.1175 +            java2mime.put("8859_1", "ISO-8859-1");
  1.1176 +            java2mime.put("iso8859_1", "ISO-8859-1");
  1.1177 +            java2mime.put("ISO8859-1", "ISO-8859-1");
  1.1178 +
  1.1179 +            java2mime.put("8859_2", "ISO-8859-2");
  1.1180 +            java2mime.put("iso8859_2", "ISO-8859-2");
  1.1181 +            java2mime.put("ISO8859-2", "ISO-8859-2");
  1.1182 +
  1.1183 +            java2mime.put("8859_3", "ISO-8859-3");
  1.1184 +            java2mime.put("iso8859_3", "ISO-8859-3");
  1.1185 +            java2mime.put("ISO8859-3", "ISO-8859-3");
  1.1186 +
  1.1187 +            java2mime.put("8859_4", "ISO-8859-4");
  1.1188 +            java2mime.put("iso8859_4", "ISO-8859-4");
  1.1189 +            java2mime.put("ISO8859-4", "ISO-8859-4");
  1.1190 +
  1.1191 +            java2mime.put("8859_5", "ISO-8859-5");
  1.1192 +            java2mime.put("iso8859_5", "ISO-8859-5");
  1.1193 +            java2mime.put("ISO8859-5", "ISO-8859-5");
  1.1194 +
  1.1195 +            java2mime.put("8859_6", "ISO-8859-6");
  1.1196 +            java2mime.put("iso8859_6", "ISO-8859-6");
  1.1197 +            java2mime.put("ISO8859-6", "ISO-8859-6");
  1.1198 +
  1.1199 +            java2mime.put("8859_7", "ISO-8859-7");
  1.1200 +            java2mime.put("iso8859_7", "ISO-8859-7");
  1.1201 +            java2mime.put("ISO8859-7", "ISO-8859-7");
  1.1202 +
  1.1203 +            java2mime.put("8859_8", "ISO-8859-8");
  1.1204 +            java2mime.put("iso8859_8", "ISO-8859-8");
  1.1205 +            java2mime.put("ISO8859-8", "ISO-8859-8");
  1.1206 +
  1.1207 +            java2mime.put("8859_9", "ISO-8859-9");
  1.1208 +            java2mime.put("iso8859_9", "ISO-8859-9");
  1.1209 +            java2mime.put("ISO8859-9", "ISO-8859-9");
  1.1210 +
  1.1211 +            java2mime.put("SJIS", "Shift_JIS");
  1.1212 +            java2mime.put("MS932", "Shift_JIS");
  1.1213 +            java2mime.put("JIS", "ISO-2022-JP");
  1.1214 +            java2mime.put("ISO2022JP", "ISO-2022-JP");
  1.1215 +            java2mime.put("EUC_JP", "euc-jp");
  1.1216 +            java2mime.put("KOI8_R", "koi8-r");
  1.1217 +            java2mime.put("EUC_CN", "euc-cn");
  1.1218 +            java2mime.put("EUC_TW", "euc-tw");
  1.1219 +            java2mime.put("EUC_KR", "euc-kr");
  1.1220 +        }
  1.1221 +        if (mime2java.isEmpty()) {
  1.1222 +            mime2java.put("iso-2022-cn", "ISO2022CN");
  1.1223 +            mime2java.put("iso-2022-kr", "ISO2022KR");
  1.1224 +            mime2java.put("utf-8", "UTF8");
  1.1225 +            mime2java.put("utf8", "UTF8");
  1.1226 +            mime2java.put("ja_jp.iso2022-7", "ISO2022JP");
  1.1227 +            mime2java.put("ja_jp.eucjp", "EUCJIS");
  1.1228 +            mime2java.put("euc-kr", "KSC5601");
  1.1229 +            mime2java.put("euckr", "KSC5601");
  1.1230 +            mime2java.put("us-ascii", "ISO-8859-1");
  1.1231 +            mime2java.put("x-us-ascii", "ISO-8859-1");
  1.1232 +        }
  1.1233 +    }
  1.1234 +
  1.1235 +    private static void loadMappings(LineInputStream is, Hashtable table) {
  1.1236 +        String currLine;
  1.1237 +
  1.1238 +        while (true) {
  1.1239 +            try {
  1.1240 +                currLine = is.readLine();
  1.1241 +            } catch (IOException ioex) {
  1.1242 +                break; // error in reading, stop
  1.1243 +            }
  1.1244 +
  1.1245 +            if (currLine == null) // end of file, stop
  1.1246 +                break;
  1.1247 +            if (currLine.startsWith("--") && currLine.endsWith("--"))
  1.1248 +                // end of this table
  1.1249 +                break;
  1.1250 +
  1.1251 +            // ignore empty lines and comments
  1.1252 +            if (currLine.trim().length() == 0 || currLine.startsWith("#"))
  1.1253 +                continue;
  1.1254 +
  1.1255 +            // A valid entry is of the form <key><separator><value>
  1.1256 +            // where, <separator> := SPACE | HT. Parse this
  1.1257 +            StringTokenizer tk = new StringTokenizer(currLine, " \t");
  1.1258 +            try {
  1.1259 +                String key = tk.nextToken();
  1.1260 +                String value = tk.nextToken();
  1.1261 +                table.put(key.toLowerCase(), value);
  1.1262 +            } catch (NoSuchElementException nex) { }
  1.1263 +        }
  1.1264 +    }
  1.1265 +
  1.1266 +    static final int ALL_ASCII          = 1;
  1.1267 +    static final int MOSTLY_ASCII       = 2;
  1.1268 +    static final int MOSTLY_NONASCII    = 3;
  1.1269 +
  1.1270 +    /**
  1.1271 +     * Check if the given string contains non US-ASCII characters.
  1.1272 +     * @param   s       string
  1.1273 +     * @return          ALL_ASCII if all characters in the string
  1.1274 +     *                  belong to the US-ASCII charset. MOSTLY_ASCII
  1.1275 +     *                  if more than half of the available characters
  1.1276 +     *                  are US-ASCII characters. Else MOSTLY_NONASCII.
  1.1277 +     */
  1.1278 +    static int checkAscii(String s) {
  1.1279 +        int ascii = 0, non_ascii = 0;
  1.1280 +        int l = s.length();
  1.1281 +
  1.1282 +        for (int i = 0; i < l; i++) {
  1.1283 +            if (nonascii((int)s.charAt(i))) // non-ascii
  1.1284 +                non_ascii++;
  1.1285 +            else
  1.1286 +                ascii++;
  1.1287 +        }
  1.1288 +
  1.1289 +        if (non_ascii == 0)
  1.1290 +            return ALL_ASCII;
  1.1291 +        if (ascii > non_ascii)
  1.1292 +            return MOSTLY_ASCII;
  1.1293 +
  1.1294 +        return MOSTLY_NONASCII;
  1.1295 +    }
  1.1296 +
  1.1297 +    /**
  1.1298 +     * Check if the given byte array contains non US-ASCII characters.
  1.1299 +     * @param   b       byte array
  1.1300 +     * @return          ALL_ASCII if all characters in the string
  1.1301 +     *                  belong to the US-ASCII charset. MOSTLY_ASCII
  1.1302 +     *                  if more than half of the available characters
  1.1303 +     *                  are US-ASCII characters. Else MOSTLY_NONASCII.
  1.1304 +     *
  1.1305 +     * XXX - this method is no longer used
  1.1306 +     */
  1.1307 +    static int checkAscii(byte[] b) {
  1.1308 +        int ascii = 0, non_ascii = 0;
  1.1309 +
  1.1310 +        for (int i=0; i < b.length; i++) {
  1.1311 +            // The '&' operator automatically causes b[i] to be promoted
  1.1312 +            // to an int, and we mask out the higher bytes in the int
  1.1313 +            // so that the resulting value is not a negative integer.
  1.1314 +            if (nonascii(b[i] & 0xff)) // non-ascii
  1.1315 +                non_ascii++;
  1.1316 +            else
  1.1317 +                ascii++;
  1.1318 +        }
  1.1319 +
  1.1320 +        if (non_ascii == 0)
  1.1321 +            return ALL_ASCII;
  1.1322 +        if (ascii > non_ascii)
  1.1323 +            return MOSTLY_ASCII;
  1.1324 +
  1.1325 +        return MOSTLY_NONASCII;
  1.1326 +    }
  1.1327 +
  1.1328 +    /**
  1.1329 +     * Check if the given input stream contains non US-ASCII characters.
  1.1330 +     * Upto <code>max</code> bytes are checked. If <code>max</code> is
  1.1331 +     * set to <code>ALL</code>, then all the bytes available in this
  1.1332 +     * input stream are checked. If <code>breakOnNonAscii</code> is true
  1.1333 +     * the check terminates when the first non-US-ASCII character is
  1.1334 +     * found and MOSTLY_NONASCII is returned. Else, the check continues
  1.1335 +     * till <code>max</code> bytes or till the end of stream.
  1.1336 +     *
  1.1337 +     * @param   is      the input stream
  1.1338 +     * @param   max     maximum bytes to check for. The special value
  1.1339 +     *                  ALL indicates that all the bytes in this input
  1.1340 +     *                  stream must be checked.
  1.1341 +     * @param   breakOnNonAscii if <code>true</code>, then terminate the
  1.1342 +     *                  the check when the first non-US-ASCII character
  1.1343 +     *                  is found.
  1.1344 +     * @return          ALL_ASCII if all characters in the string
  1.1345 +     *                  belong to the US-ASCII charset. MOSTLY_ASCII
  1.1346 +     *                  if more than half of the available characters
  1.1347 +     *                  are US-ASCII characters. Else MOSTLY_NONASCII.
  1.1348 +     */
  1.1349 +    static int checkAscii(InputStream is, int max, boolean breakOnNonAscii) {
  1.1350 +        int ascii = 0, non_ascii = 0;
  1.1351 +        int len;
  1.1352 +        int block = 4096;
  1.1353 +        int linelen = 0;
  1.1354 +        boolean longLine = false, badEOL = false;
  1.1355 +        boolean checkEOL = encodeEolStrict && breakOnNonAscii;
  1.1356 +        byte buf[] = null;
  1.1357 +        if (max != 0) {
  1.1358 +            block = (max == ALL) ? 4096 : Math.min(max, 4096);
  1.1359 +            buf = new byte[block];
  1.1360 +        }
  1.1361 +        while (max != 0) {
  1.1362 +            try {
  1.1363 +                if ((len = is.read(buf, 0, block)) == -1)
  1.1364 +                    break;
  1.1365 +                int lastb = 0;
  1.1366 +                for (int i = 0; i < len; i++) {
  1.1367 +                    // The '&' operator automatically causes b[i] to
  1.1368 +                    // be promoted to an int, and we mask out the higher
  1.1369 +                    // bytes in the int so that the resulting value is
  1.1370 +                    // not a negative integer.
  1.1371 +                    int b = buf[i] & 0xff;
  1.1372 +                    if (checkEOL &&
  1.1373 +                            ((lastb == '\r' && b != '\n') ||
  1.1374 +                            (lastb != '\r' && b == '\n')))
  1.1375 +                        badEOL = true;
  1.1376 +                    if (b == '\r' || b == '\n')
  1.1377 +                        linelen = 0;
  1.1378 +                    else {
  1.1379 +                        linelen++;
  1.1380 +                        if (linelen > 998)      // 1000 - CRLF
  1.1381 +                            longLine = true;
  1.1382 +                    }
  1.1383 +                    if (nonascii(b)) {  // non-ascii
  1.1384 +                        if (breakOnNonAscii) // we are done
  1.1385 +                            return MOSTLY_NONASCII;
  1.1386 +                        else
  1.1387 +                            non_ascii++;
  1.1388 +                    } else
  1.1389 +                        ascii++;
  1.1390 +                    lastb = b;
  1.1391 +                }
  1.1392 +            } catch (IOException ioex) {
  1.1393 +                break;
  1.1394 +            }
  1.1395 +            if (max != ALL)
  1.1396 +                max -= len;
  1.1397 +        }
  1.1398 +
  1.1399 +        if (max == 0 && breakOnNonAscii)
  1.1400 +            // We have been told to break on the first non-ascii character.
  1.1401 +            // We haven't got any non-ascii character yet, but then we
  1.1402 +            // have not checked all of the available bytes either. So we
  1.1403 +            // cannot say for sure that this input stream is ALL_ASCII,
  1.1404 +            // and hence we must play safe and return MOSTLY_NONASCII
  1.1405 +
  1.1406 +            return MOSTLY_NONASCII;
  1.1407 +
  1.1408 +        if (non_ascii == 0) { // no non-us-ascii characters so far
  1.1409 +            // If we're looking at non-text data, and we saw CR without LF
  1.1410 +            // or vice versa, consider this mostly non-ASCII so that it
  1.1411 +            // will be base64 encoded (since the quoted-printable encoder
  1.1412 +            // doesn't encode this case properly).
  1.1413 +            if (badEOL)
  1.1414 +                return MOSTLY_NONASCII;
  1.1415 +            // if we've seen a long line, we degrade to mostly ascii
  1.1416 +            else if (longLine)
  1.1417 +                return MOSTLY_ASCII;
  1.1418 +            else
  1.1419 +                return ALL_ASCII;
  1.1420 +        }
  1.1421 +        if (ascii > non_ascii) // mostly ascii
  1.1422 +            return MOSTLY_ASCII;
  1.1423 +        return MOSTLY_NONASCII;
  1.1424 +    }
  1.1425 +
  1.1426 +    static final boolean nonascii(int b) {
  1.1427 +        return b >= 0177 || (b < 040 && b != '\r' && b != '\n' && b != '\t');
  1.1428 +    }
  1.1429 +}
  1.1430 +
  1.1431 +/**
  1.1432 + * An OutputStream that determines whether the data written to
  1.1433 + * it is all ASCII, mostly ASCII, or mostly non-ASCII.
  1.1434 + */
  1.1435 +class AsciiOutputStream extends OutputStream {
  1.1436 +    private boolean breakOnNonAscii;
  1.1437 +    private int ascii = 0, non_ascii = 0;
  1.1438 +    private int linelen = 0;
  1.1439 +    private boolean longLine = false;
  1.1440 +    private boolean badEOL = false;
  1.1441 +    private boolean checkEOL = false;
  1.1442 +    private int lastb = 0;
  1.1443 +    private int ret = 0;
  1.1444 +
  1.1445 +    public AsciiOutputStream(boolean breakOnNonAscii, boolean encodeEolStrict) {
  1.1446 +        this.breakOnNonAscii = breakOnNonAscii;
  1.1447 +        checkEOL = encodeEolStrict && breakOnNonAscii;
  1.1448 +    }
  1.1449 +
  1.1450 +    public void write(int b) throws IOException {
  1.1451 +        check(b);
  1.1452 +    }
  1.1453 +
  1.1454 +    public void write(byte b[]) throws IOException {
  1.1455 +        write(b, 0, b.length);
  1.1456 +    }
  1.1457 +
  1.1458 +    public void write(byte b[], int off, int len) throws IOException {
  1.1459 +        len += off;
  1.1460 +        for (int i = off; i < len ; i++)
  1.1461 +            check(b[i]);
  1.1462 +    }
  1.1463 +
  1.1464 +    private final void check(int b) throws IOException {
  1.1465 +        b &= 0xff;
  1.1466 +        if (checkEOL &&
  1.1467 +                ((lastb == '\r' && b != '\n') || (lastb != '\r' && b == '\n')))
  1.1468 +            badEOL = true;
  1.1469 +        if (b == '\r' || b == '\n')
  1.1470 +            linelen = 0;
  1.1471 +        else {
  1.1472 +            linelen++;
  1.1473 +            if (linelen > 998)  // 1000 - CRLF
  1.1474 +                longLine = true;
  1.1475 +        }
  1.1476 +        if (MimeUtility.nonascii(b)) { // non-ascii
  1.1477 +            non_ascii++;
  1.1478 +            if (breakOnNonAscii) {      // we are done
  1.1479 +                ret = MimeUtility.MOSTLY_NONASCII;
  1.1480 +                throw new EOFException();
  1.1481 +            }
  1.1482 +        } else
  1.1483 +            ascii++;
  1.1484 +        lastb = b;
  1.1485 +    }
  1.1486 +
  1.1487 +    /**
  1.1488 +     * Return ASCII-ness of data stream.
  1.1489 +     */
  1.1490 +    public int getAscii() {
  1.1491 +        if (ret != 0)
  1.1492 +            return ret;
  1.1493 +        // If we're looking at non-text data, and we saw CR without LF
  1.1494 +        // or vice versa, consider this mostly non-ASCII so that it
  1.1495 +        // will be base64 encoded (since the quoted-printable encoder
  1.1496 +        // doesn't encode this case properly).
  1.1497 +        if (badEOL)
  1.1498 +            return MimeUtility.MOSTLY_NONASCII;
  1.1499 +        else if (non_ascii == 0) { // no non-us-ascii characters so far
  1.1500 +            // if we've seen a long line, we degrade to mostly ascii
  1.1501 +            if (longLine)
  1.1502 +                return MimeUtility.MOSTLY_ASCII;
  1.1503 +            else
  1.1504 +                return MimeUtility.ALL_ASCII;
  1.1505 +        }
  1.1506 +        if (ascii > non_ascii) // mostly ascii
  1.1507 +            return MimeUtility.MOSTLY_ASCII;
  1.1508 +        return MimeUtility.MOSTLY_NONASCII;
  1.1509 +    }
  1.1510 +}

mercurial