aoqi@0: /* aoqi@0: * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. aoqi@0: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. aoqi@0: * aoqi@0: * This code is free software; you can redistribute it and/or modify it aoqi@0: * under the terms of the GNU General Public License version 2 only, as aoqi@0: * published by the Free Software Foundation. Oracle designates this aoqi@0: * particular file as subject to the "Classpath" exception as provided aoqi@0: * by Oracle in the LICENSE file that accompanied this code. aoqi@0: * aoqi@0: * This code is distributed in the hope that it will be useful, but WITHOUT aoqi@0: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or aoqi@0: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License aoqi@0: * version 2 for more details (a copy is included in the LICENSE file that aoqi@0: * accompanied this code). aoqi@0: * aoqi@0: * You should have received a copy of the GNU General Public License version aoqi@0: * 2 along with this work; if not, write to the Free Software Foundation, aoqi@0: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. aoqi@0: * aoqi@0: * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA aoqi@0: * or visit www.oracle.com if you need additional information or have any aoqi@0: * questions. aoqi@0: */ aoqi@0: aoqi@0: /* aoqi@0: * @(#)MimeUtility.java 1.45 03/03/10 aoqi@0: */ aoqi@0: aoqi@0: aoqi@0: aoqi@0: package com.sun.xml.internal.messaging.saaj.packaging.mime.internet; aoqi@0: aoqi@0: import java.io.*; aoqi@0: import java.util.*; aoqi@0: aoqi@0: import javax.activation.DataHandler; aoqi@0: import javax.activation.DataSource; aoqi@0: aoqi@0: import com.sun.xml.internal.messaging.saaj.packaging.mime.MessagingException; aoqi@0: import com.sun.xml.internal.messaging.saaj.packaging.mime.util.*; aoqi@0: import com.sun.xml.internal.messaging.saaj.util.SAAJUtil; aoqi@0: aoqi@0: /** aoqi@0: * This is a utility class that provides various MIME related aoqi@0: * functionality.

aoqi@0: * aoqi@0: * There are a set of methods to encode and decode MIME headers as aoqi@0: * per RFC 2047. A brief description on handling such headers is aoqi@0: * given below:

aoqi@0: * aoqi@0: * RFC 822 mail headers must contain only US-ASCII aoqi@0: * characters. Headers that contain non US-ASCII characters must be aoqi@0: * encoded so that they contain only US-ASCII characters. Basically, aoqi@0: * this process involves using either BASE64 or QP to encode certain aoqi@0: * characters. RFC 2047 describes this in detail.

aoqi@0: * aoqi@0: * In Java, Strings contain (16 bit) Unicode characters. ASCII is a aoqi@0: * subset of Unicode (and occupies the range 0 - 127). A String aoqi@0: * that contains only ASCII characters is already mail-safe. If the aoqi@0: * String contains non US-ASCII characters, it must be encoded. An aoqi@0: * additional complexity in this step is that since Unicode is not aoqi@0: * yet a widely used charset, one might want to first charset-encode aoqi@0: * the String into another charset and then do the transfer-encoding. aoqi@0: *

aoqi@0: * Note that to get the actual bytes of a mail-safe String (say, aoqi@0: * for sending over SMTP), one must do aoqi@0: *

aoqi@0:  *
aoqi@0:  *      byte[] bytes = string.getBytes("iso-8859-1");
aoqi@0:  *
aoqi@0:  *

aoqi@0: * aoqi@0: * The setHeader and addHeader methods aoqi@0: * on MimeMessage and MimeBodyPart assume that the given header values aoqi@0: * are Unicode strings that contain only US-ASCII characters. Hence aoqi@0: * the callers of those methods must insure that the values they pass aoqi@0: * do not contain non US-ASCII characters. The methods in this class aoqi@0: * help do this.

aoqi@0: * aoqi@0: * The getHeader family of methods on MimeMessage and aoqi@0: * MimeBodyPart return the raw header value. These might be encoded aoqi@0: * as per RFC 2047, and if so, must be decoded into Unicode Strings. aoqi@0: * The methods in this class help to do this.

aoqi@0: * aoqi@0: * Several System properties control strict conformance to the MIME aoqi@0: * spec. Note that these are not session properties but must be set aoqi@0: * globally as System properties.

aoqi@0: * aoqi@0: * The mail.mime.decodetext.strict property controls aoqi@0: * decoding of MIME encoded words. The MIME spec requires that encoded aoqi@0: * words start at the beginning of a whitespace separated word. Some aoqi@0: * mailers incorrectly include encoded words in the middle of a word. aoqi@0: * If the mail.mime.decodetext.strict System property is aoqi@0: * set to "false", an attempt will be made to decode these aoqi@0: * illegal encoded words. The default is true.

aoqi@0: * aoqi@0: * The mail.mime.encodeeol.strict property controls the aoqi@0: * choice of Content-Transfer-Encoding for MIME parts that are not of aoqi@0: * type "text". Often such parts will contain textual data for which aoqi@0: * an encoding that allows normal end of line conventions is appropriate. aoqi@0: * In rare cases, such a part will appear to contain entirely textual aoqi@0: * data, but will require an encoding that preserves CR and LF characters aoqi@0: * without change. If the mail.mime.decodetext.strict aoqi@0: * System property is set to "true", such an encoding will aoqi@0: * be used when necessary. The default is false.

aoqi@0: * aoqi@0: * In addition, the mail.mime.charset System property can aoqi@0: * be used to specify the default MIME charset to use for encoded words aoqi@0: * and text parts that don't otherwise specify a charset. Normally, the aoqi@0: * default MIME charset is derived from the default Java charset, as aoqi@0: * specified in the file.encoding System property. Most aoqi@0: * applications will have no need to explicitly set the default MIME aoqi@0: * charset. In cases where the default MIME charset to be used for aoqi@0: * mail messages is different than the charset used for files stored on aoqi@0: * the system, this property should be set. aoqi@0: * aoqi@0: * @version 1.45, 03/03/10 aoqi@0: * @author John Mani aoqi@0: * @author Bill Shannon aoqi@0: */ aoqi@0: aoqi@0: public class MimeUtility { aoqi@0: aoqi@0: // This class cannot be instantiated aoqi@0: private MimeUtility() { } aoqi@0: aoqi@0: public static final int ALL = -1; aoqi@0: aoqi@0: private static final int BUFFER_SIZE = 1024; aoqi@0: private static boolean decodeStrict = true; aoqi@0: private static boolean encodeEolStrict = false; aoqi@0: private static boolean foldEncodedWords = false; aoqi@0: private static boolean foldText = true; aoqi@0: aoqi@0: static { aoqi@0: try { aoqi@0: String s = SAAJUtil.getSystemProperty("mail.mime.decodetext.strict"); aoqi@0: // default to true aoqi@0: decodeStrict = s == null || !s.equalsIgnoreCase("false"); aoqi@0: s = SAAJUtil.getSystemProperty("mail.mime.encodeeol.strict"); aoqi@0: // default to false aoqi@0: encodeEolStrict = s != null && s.equalsIgnoreCase("true"); aoqi@0: s = SAAJUtil.getSystemProperty("mail.mime.foldencodedwords"); aoqi@0: // default to false aoqi@0: foldEncodedWords = s != null && s.equalsIgnoreCase("true"); aoqi@0: s = SAAJUtil.getSystemProperty("mail.mime.foldtext"); aoqi@0: // default to true aoqi@0: foldText = s == null || !s.equalsIgnoreCase("false"); aoqi@0: } catch (SecurityException sex) { aoqi@0: // ignore it aoqi@0: } aoqi@0: } aoqi@0: aoqi@0: aoqi@0: /** aoqi@0: * Get the content-transfer-encoding that should be applied aoqi@0: * to the input stream of this datasource, to make it mailsafe.

aoqi@0: * aoqi@0: * The algorithm used here is:
aoqi@0: *

aoqi@0: * If the primary type of this datasource is "text" and if all aoqi@0: * the bytes in its input stream are US-ASCII, then the encoding aoqi@0: * is "7bit". If more than half of the bytes are non-US-ASCII, then aoqi@0: * the encoding is "base64". If less than half of the bytes are aoqi@0: * non-US-ASCII, then the encoding is "quoted-printable". aoqi@0: *
aoqi@0: * If the primary type of this datasource is not "text", then if aoqi@0: * all the bytes of its input stream are US-ASCII, the encoding aoqi@0: * is "7bit". If there is even one non-US-ASCII character, the aoqi@0: * encoding is "base64". aoqi@0: *

aoqi@0: * aoqi@0: * @param ds DataSource aoqi@0: * @return the encoding. This is either "7bit", aoqi@0: * "quoted-printable" or "base64" aoqi@0: */ aoqi@0: public static String getEncoding(DataSource ds) { aoqi@0: ContentType cType = null; aoqi@0: InputStream is = null; aoqi@0: String encoding = null; aoqi@0: aoqi@0: try { aoqi@0: cType = new ContentType(ds.getContentType()); aoqi@0: is = ds.getInputStream(); aoqi@0: } catch (Exception ex) { aoqi@0: return "base64"; // what else ?! aoqi@0: } aoqi@0: aoqi@0: boolean isText = cType.match("text/*"); aoqi@0: // if not text, stop processing when we see non-ASCII aoqi@0: int i = checkAscii(is, ALL, !isText); aoqi@0: switch (i) { aoqi@0: case ALL_ASCII: aoqi@0: encoding = "7bit"; // all ascii aoqi@0: break; aoqi@0: case MOSTLY_ASCII: aoqi@0: encoding = "quoted-printable"; // mostly ascii aoqi@0: break; aoqi@0: default: aoqi@0: encoding = "base64"; // mostly binary aoqi@0: break; aoqi@0: } aoqi@0: aoqi@0: // Close the input stream aoqi@0: try { aoqi@0: is.close(); aoqi@0: } catch (IOException ioex) { } aoqi@0: aoqi@0: return encoding; aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Same as getEncoding(DataSource) except that instead aoqi@0: * of reading the data from an InputStream it uses the aoqi@0: * writeTo method to examine the data. This is more aoqi@0: * efficient in the common case of a DataHandler aoqi@0: * created with an object and a MIME type (for example, a aoqi@0: * "text/plain" String) because all the I/O is done in this aoqi@0: * thread. In the case requiring an InputStream the aoqi@0: * DataHandler uses a thread, a pair of pipe streams, aoqi@0: * and the writeTo method to produce the data.

aoqi@0: * aoqi@0: * @since JavaMail 1.2 aoqi@0: */ aoqi@0: public static String getEncoding(DataHandler dh) { aoqi@0: ContentType cType = null; aoqi@0: String encoding = null; aoqi@0: aoqi@0: /* aoqi@0: * Try to pick the most efficient means of determining the aoqi@0: * encoding. If this DataHandler was created using a DataSource, aoqi@0: * the getEncoding(DataSource) method is typically faster. If aoqi@0: * the DataHandler was created with an object, this method is aoqi@0: * much faster. To distinguish the two cases, we use a heuristic. aoqi@0: * A DataHandler created with an object will always have a null name. aoqi@0: * A DataHandler created with a DataSource will usually have a aoqi@0: * non-null name. aoqi@0: * aoqi@0: * XXX - This is actually quite a disgusting hack, but it makes aoqi@0: * a common case run over twice as fast. aoqi@0: */ aoqi@0: if (dh.getName() != null) aoqi@0: return getEncoding(dh.getDataSource()); aoqi@0: aoqi@0: try { aoqi@0: cType = new ContentType(dh.getContentType()); aoqi@0: } catch (Exception ex) { aoqi@0: return "base64"; // what else ?! aoqi@0: } aoqi@0: aoqi@0: if (cType.match("text/*")) { aoqi@0: // Check all of the available bytes aoqi@0: AsciiOutputStream aos = new AsciiOutputStream(false, false); aoqi@0: try { aoqi@0: dh.writeTo(aos); aoqi@0: } catch (IOException ex) { } // ignore it aoqi@0: switch (aos.getAscii()) { aoqi@0: case ALL_ASCII: aoqi@0: encoding = "7bit"; // all ascii aoqi@0: break; aoqi@0: case MOSTLY_ASCII: aoqi@0: encoding = "quoted-printable"; // mostly ascii aoqi@0: break; aoqi@0: default: aoqi@0: encoding = "base64"; // mostly binary aoqi@0: break; aoqi@0: } aoqi@0: } else { // not "text" aoqi@0: // Check all of available bytes, break out if we find aoqi@0: // at least one non-US-ASCII character aoqi@0: AsciiOutputStream aos = aoqi@0: new AsciiOutputStream(true, encodeEolStrict); aoqi@0: try { aoqi@0: dh.writeTo(aos); aoqi@0: } catch (IOException ex) { } // ignore it aoqi@0: if (aos.getAscii() == ALL_ASCII) // all ascii aoqi@0: encoding = "7bit"; aoqi@0: else // found atleast one non-ascii character, use b64 aoqi@0: encoding = "base64"; aoqi@0: } aoqi@0: aoqi@0: return encoding; aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Decode the given input stream. The Input stream returned is aoqi@0: * the decoded input stream. All the encodings defined in RFC 2045 aoqi@0: * are supported here. They include "base64", "quoted-printable", aoqi@0: * "7bit", "8bit", and "binary". In addition, "uuencode" is also aoqi@0: * supported. aoqi@0: * aoqi@0: * @param is input stream aoqi@0: * @param encoding the encoding of the stream. aoqi@0: * @return decoded input stream. aoqi@0: */ aoqi@0: public static InputStream decode(InputStream is, String encoding) aoqi@0: throws MessagingException { aoqi@0: if (encoding.equalsIgnoreCase("base64")) aoqi@0: return new BASE64DecoderStream(is); aoqi@0: else if (encoding.equalsIgnoreCase("quoted-printable")) aoqi@0: return new QPDecoderStream(is); aoqi@0: else if (encoding.equalsIgnoreCase("uuencode") || aoqi@0: encoding.equalsIgnoreCase("x-uuencode") || aoqi@0: encoding.equalsIgnoreCase("x-uue")) aoqi@0: return new UUDecoderStream(is); aoqi@0: else if (encoding.equalsIgnoreCase("binary") || aoqi@0: encoding.equalsIgnoreCase("7bit") || aoqi@0: encoding.equalsIgnoreCase("8bit")) aoqi@0: return is; aoqi@0: else aoqi@0: throw new MessagingException("Unknown encoding: " + encoding); aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Wrap an encoder around the given output stream. aoqi@0: * All the encodings defined in RFC 2045 are supported here. aoqi@0: * They include "base64", "quoted-printable", "7bit", "8bit" and aoqi@0: * "binary". In addition, "uuencode" is also supported. aoqi@0: * aoqi@0: * @param os output stream aoqi@0: * @param encoding the encoding of the stream. aoqi@0: * @return output stream that applies the aoqi@0: * specified encoding. aoqi@0: */ aoqi@0: public static OutputStream encode(OutputStream os, String encoding) aoqi@0: throws MessagingException { aoqi@0: if (encoding == null) aoqi@0: return os; aoqi@0: else if (encoding.equalsIgnoreCase("base64")) aoqi@0: return new BASE64EncoderStream(os); aoqi@0: else if (encoding.equalsIgnoreCase("quoted-printable")) aoqi@0: return new QPEncoderStream(os); aoqi@0: else if (encoding.equalsIgnoreCase("uuencode") || aoqi@0: encoding.equalsIgnoreCase("x-uuencode") || aoqi@0: encoding.equalsIgnoreCase("x-uue")) aoqi@0: return new UUEncoderStream(os); aoqi@0: else if (encoding.equalsIgnoreCase("binary") || aoqi@0: encoding.equalsIgnoreCase("7bit") || aoqi@0: encoding.equalsIgnoreCase("8bit")) aoqi@0: return os; aoqi@0: else aoqi@0: throw new MessagingException("Unknown encoding: " +encoding); aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Wrap an encoder around the given output stream. aoqi@0: * All the encodings defined in RFC 2045 are supported here. aoqi@0: * They include "base64", "quoted-printable", "7bit", "8bit" and aoqi@0: * "binary". In addition, "uuencode" is also supported. aoqi@0: * The filename parameter is used with the "uuencode" aoqi@0: * encoding and is included in the encoded output. aoqi@0: * aoqi@0: * @param os output stream aoqi@0: * @param encoding the encoding of the stream. aoqi@0: * @param filename name for the file being encoded (only used aoqi@0: * with uuencode) aoqi@0: * @return output stream that applies the aoqi@0: * specified encoding. aoqi@0: * @since JavaMail 1.2 aoqi@0: */ aoqi@0: public static OutputStream encode(OutputStream os, String encoding, aoqi@0: String filename) aoqi@0: throws MessagingException { aoqi@0: if (encoding == null) aoqi@0: return os; aoqi@0: else if (encoding.equalsIgnoreCase("base64")) aoqi@0: return new BASE64EncoderStream(os); aoqi@0: else if (encoding.equalsIgnoreCase("quoted-printable")) aoqi@0: return new QPEncoderStream(os); aoqi@0: else if (encoding.equalsIgnoreCase("uuencode") || aoqi@0: encoding.equalsIgnoreCase("x-uuencode") || aoqi@0: encoding.equalsIgnoreCase("x-uue")) aoqi@0: return new UUEncoderStream(os, filename); aoqi@0: else if (encoding.equalsIgnoreCase("binary") || aoqi@0: encoding.equalsIgnoreCase("7bit") || aoqi@0: encoding.equalsIgnoreCase("8bit")) aoqi@0: return os; aoqi@0: else aoqi@0: throw new MessagingException("Unknown encoding: " +encoding); aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Encode a RFC 822 "text" token into mail-safe form as per aoqi@0: * RFC 2047.

aoqi@0: * aoqi@0: * The given Unicode string is examined for non US-ASCII aoqi@0: * characters. If the string contains only US-ASCII characters, aoqi@0: * it is returned as-is. If the string contains non US-ASCII aoqi@0: * characters, it is first character-encoded using the platform's aoqi@0: * default charset, then transfer-encoded using either the B or aoqi@0: * Q encoding. The resulting bytes are then returned as a Unicode aoqi@0: * string containing only ASCII characters.

aoqi@0: * aoqi@0: * Note that this method should be used to encode only aoqi@0: * "unstructured" RFC 822 headers.

aoqi@0: * aoqi@0: * Example of usage: aoqi@0: *

aoqi@0:      *
aoqi@0:      *  MimeBodyPart part = ...
aoqi@0:      *  String rawvalue = "FooBar Mailer, Japanese version 1.1"
aoqi@0:      *  try {
aoqi@0:      *    // If we know for sure that rawvalue contains only US-ASCII
aoqi@0:      *    // characters, we can skip the encoding part
aoqi@0:      *    part.setHeader("X-mailer", MimeUtility.encodeText(rawvalue));
aoqi@0:      *  } catch (UnsupportedEncodingException e) {
aoqi@0:      *    // encoding failure
aoqi@0:      *  } catch (MessagingException me) {
aoqi@0:      *   // setHeader() failure
aoqi@0:      *  }
aoqi@0:      *
aoqi@0:      *

aoqi@0: * aoqi@0: * @param text unicode string aoqi@0: * @return Unicode string containing only US-ASCII characters aoqi@0: * @exception UnsupportedEncodingException if the encoding fails aoqi@0: */ aoqi@0: public static String encodeText(String text) aoqi@0: throws UnsupportedEncodingException { aoqi@0: return encodeText(text, null, null); aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Encode a RFC 822 "text" token into mail-safe form as per aoqi@0: * RFC 2047.

aoqi@0: * aoqi@0: * The given Unicode string is examined for non US-ASCII aoqi@0: * characters. If the string contains only US-ASCII characters, aoqi@0: * it is returned as-is. If the string contains non US-ASCII aoqi@0: * characters, it is first character-encoded using the specified aoqi@0: * charset, then transfer-encoded using either the B or Q encoding. aoqi@0: * The resulting bytes are then returned as a Unicode string aoqi@0: * containing only ASCII characters.

aoqi@0: * aoqi@0: * Note that this method should be used to encode only aoqi@0: * "unstructured" RFC 822 headers. aoqi@0: * aoqi@0: * @param text the header value aoqi@0: * @param charset the charset. If this parameter is null, the aoqi@0: * platform's default chatset is used. aoqi@0: * @param encoding the encoding to be used. Currently supported aoqi@0: * values are "B" and "Q". If this parameter is null, then aoqi@0: * the "Q" encoding is used if most of characters to be aoqi@0: * encoded are in the ASCII charset, otherwise "B" encoding aoqi@0: * is used. aoqi@0: * @return Unicode string containing only US-ASCII characters aoqi@0: */ aoqi@0: public static String encodeText(String text, String charset, aoqi@0: String encoding) aoqi@0: throws UnsupportedEncodingException { aoqi@0: return encodeWord(text, charset, encoding, false); aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Decode "unstructured" headers, that is, headers that are defined aoqi@0: * as '*text' as per RFC 822.

aoqi@0: * aoqi@0: * The string is decoded using the algorithm specified in aoqi@0: * RFC 2047, Section 6.1.1. If the charset-conversion fails aoqi@0: * for any sequence, an UnsupportedEncodingException is thrown. aoqi@0: * If the String is not an RFC 2047 style encoded header, it is aoqi@0: * returned as-is

aoqi@0: * aoqi@0: * Example of usage: aoqi@0: *

aoqi@0:      *
aoqi@0:      *  MimeBodyPart part = ...
aoqi@0:      *  String rawvalue = null;
aoqi@0:      *  String  value = null;
aoqi@0:      *  try {
aoqi@0:      *    if ((rawvalue = part.getHeader("X-mailer")[0]) != null)
aoqi@0:      *      value = MimeUtility.decodeText(rawvalue);
aoqi@0:      *  } catch (UnsupportedEncodingException e) {
aoqi@0:      *      // Don't care
aoqi@0:      *      value = rawvalue;
aoqi@0:      *  } catch (MessagingException me) { }
aoqi@0:      *
aoqi@0:      *  return value;
aoqi@0:      *
aoqi@0:      *

aoqi@0: * aoqi@0: * @param etext the possibly encoded value aoqi@0: * @exception UnsupportedEncodingException if the charset aoqi@0: * conversion failed. aoqi@0: */ aoqi@0: public static String decodeText(String etext) aoqi@0: throws UnsupportedEncodingException { aoqi@0: /* aoqi@0: * We look for sequences separated by "linear-white-space". aoqi@0: * (as per RFC 2047, Section 6.1.1) aoqi@0: * RFC 822 defines "linear-white-space" as SPACE | HT | CR | NL. aoqi@0: */ aoqi@0: String lwsp = " \t\n\r"; aoqi@0: StringTokenizer st; aoqi@0: aoqi@0: /* aoqi@0: * First, lets do a quick run thru the string and check aoqi@0: * whether the sequence "=?" exists at all. If none exists, aoqi@0: * we know there are no encoded-words in here and we can just aoqi@0: * return the string as-is, without suffering thru the later aoqi@0: * decoding logic. aoqi@0: * This handles the most common case of unencoded headers aoqi@0: * efficiently. aoqi@0: */ aoqi@0: if (etext.indexOf("=?") == -1) aoqi@0: return etext; aoqi@0: aoqi@0: // Encoded words found. Start decoding ... aoqi@0: aoqi@0: st = new StringTokenizer(etext, lwsp, true); aoqi@0: StringBuffer sb = new StringBuffer(); // decode buffer aoqi@0: StringBuffer wsb = new StringBuffer(); // white space buffer aoqi@0: boolean prevWasEncoded = false; aoqi@0: aoqi@0: while (st.hasMoreTokens()) { aoqi@0: char c; aoqi@0: String s = st.nextToken(); aoqi@0: // If whitespace, append it to the whitespace buffer aoqi@0: if (((c = s.charAt(0)) == ' ') || (c == '\t') || aoqi@0: (c == '\r') || (c == '\n')) aoqi@0: wsb.append(c); aoqi@0: else { aoqi@0: // Check if token is an 'encoded-word' .. aoqi@0: String word; aoqi@0: try { aoqi@0: word = decodeWord(s); aoqi@0: // Yes, this IS an 'encoded-word'. aoqi@0: if (!prevWasEncoded && wsb.length() > 0) { aoqi@0: // if the previous word was also encoded, we aoqi@0: // should ignore the collected whitespace. Else aoqi@0: // we include the whitespace as well. aoqi@0: sb.append(wsb); aoqi@0: } aoqi@0: prevWasEncoded = true; aoqi@0: } catch (ParseException pex) { aoqi@0: // This is NOT an 'encoded-word'. aoqi@0: word = s; aoqi@0: // possibly decode inner encoded words aoqi@0: if (!decodeStrict) aoqi@0: word = decodeInnerWords(word); aoqi@0: // include colleced whitespace .. aoqi@0: if (wsb.length() > 0) aoqi@0: sb.append(wsb); aoqi@0: prevWasEncoded = false; aoqi@0: } aoqi@0: sb.append(word); // append the actual word aoqi@0: wsb.setLength(0); // reset wsb for reuse aoqi@0: } aoqi@0: } aoqi@0: return sb.toString(); aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Encode a RFC 822 "word" token into mail-safe form as per aoqi@0: * RFC 2047.

aoqi@0: * aoqi@0: * This method is meant to be used when creating RFC 822 "phrases". aoqi@0: * The InternetAddress class, for example, uses this to encode aoqi@0: * it's 'phrase' component. aoqi@0: * aoqi@0: * @param text unicode string aoqi@0: * @return Array of Unicode strings containing only US-ASCII aoqi@0: * characters. aoqi@0: * @exception UnsupportedEncodingException if the encoding fails aoqi@0: */ aoqi@0: public static String encodeWord(String word) aoqi@0: throws UnsupportedEncodingException { aoqi@0: return encodeWord(word, null, null); aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Encode a RFC 822 "word" token into mail-safe form as per aoqi@0: * RFC 2047.

aoqi@0: * aoqi@0: * The given Unicode string is examined for non US-ASCII aoqi@0: * characters. If the string contains only US-ASCII characters, aoqi@0: * it is returned as-is. If the string contains non US-ASCII aoqi@0: * characters, it is first character-encoded using the specified aoqi@0: * charset, then transfer-encoded using either the B or Q encoding. aoqi@0: * The resulting bytes are then returned as a Unicode string aoqi@0: * containing only ASCII characters.

aoqi@0: * aoqi@0: * @param text unicode string aoqi@0: * @param charset the MIME charset aoqi@0: * @param encoding the encoding to be used. Currently supported aoqi@0: * values are "B" and "Q". If this parameter is null, then aoqi@0: * the "Q" encoding is used if most of characters to be aoqi@0: * encoded are in the ASCII charset, otherwise "B" encoding aoqi@0: * is used. aoqi@0: * @return Unicode string containing only US-ASCII characters aoqi@0: * @exception UnsupportedEncodingException if the encoding fails aoqi@0: */ aoqi@0: public static String encodeWord(String word, String charset, aoqi@0: String encoding) aoqi@0: throws UnsupportedEncodingException { aoqi@0: return encodeWord(word, charset, encoding, true); aoqi@0: } aoqi@0: aoqi@0: /* aoqi@0: * Encode the given string. The parameter 'encodingWord' should aoqi@0: * be true if a RFC 822 "word" token is being encoded and false if a aoqi@0: * RFC 822 "text" token is being encoded. This is because the aoqi@0: * "Q" encoding defined in RFC 2047 has more restrictions when aoqi@0: * encoding "word" tokens. (Sigh) aoqi@0: */ aoqi@0: private static String encodeWord(String string, String charset, aoqi@0: String encoding, boolean encodingWord) aoqi@0: throws UnsupportedEncodingException { aoqi@0: aoqi@0: // If 'string' contains only US-ASCII characters, just aoqi@0: // return it. aoqi@0: int ascii = checkAscii(string); aoqi@0: if (ascii == ALL_ASCII) aoqi@0: return string; aoqi@0: aoqi@0: // Else, apply the specified charset conversion. aoqi@0: String jcharset; aoqi@0: if (charset == null) { // use default charset aoqi@0: jcharset = getDefaultJavaCharset(); // the java charset aoqi@0: charset = getDefaultMIMECharset(); // the MIME equivalent aoqi@0: } else // MIME charset -> java charset aoqi@0: jcharset = javaCharset(charset); aoqi@0: aoqi@0: // If no transfer-encoding is specified, figure one out. aoqi@0: if (encoding == null) { aoqi@0: if (ascii != MOSTLY_NONASCII) aoqi@0: encoding = "Q"; aoqi@0: else aoqi@0: encoding = "B"; aoqi@0: } aoqi@0: aoqi@0: boolean b64; aoqi@0: if (encoding.equalsIgnoreCase("B")) aoqi@0: b64 = true; aoqi@0: else if (encoding.equalsIgnoreCase("Q")) aoqi@0: b64 = false; aoqi@0: else aoqi@0: throw new UnsupportedEncodingException( aoqi@0: "Unknown transfer encoding: " + encoding); aoqi@0: aoqi@0: StringBuffer outb = new StringBuffer(); // the output buffer aoqi@0: doEncode(string, b64, jcharset, aoqi@0: // As per RFC 2047, size of an encoded string should not aoqi@0: // exceed 75 bytes. aoqi@0: // 7 = size of "=?", '?', 'B'/'Q', '?', "?=" aoqi@0: 75 - 7 - charset.length(), // the available space aoqi@0: "=?" + charset + "?" + encoding + "?", // prefix aoqi@0: true, encodingWord, outb); aoqi@0: aoqi@0: return outb.toString(); aoqi@0: } aoqi@0: aoqi@0: private static void doEncode(String string, boolean b64, aoqi@0: String jcharset, int avail, String prefix, aoqi@0: boolean first, boolean encodingWord, StringBuffer buf) aoqi@0: throws UnsupportedEncodingException { aoqi@0: aoqi@0: // First find out what the length of the encoded version of aoqi@0: // 'string' would be. aoqi@0: byte[] bytes = string.getBytes(jcharset); aoqi@0: int len; aoqi@0: if (b64) // "B" encoding aoqi@0: len = BEncoderStream.encodedLength(bytes); aoqi@0: else // "Q" aoqi@0: len = QEncoderStream.encodedLength(bytes, encodingWord); aoqi@0: aoqi@0: int size; aoqi@0: if ((len > avail) && ((size = string.length()) > 1)) { aoqi@0: // If the length is greater than 'avail', split 'string' aoqi@0: // into two and recurse. aoqi@0: doEncode(string.substring(0, size/2), b64, jcharset, aoqi@0: avail, prefix, first, encodingWord, buf); aoqi@0: doEncode(string.substring(size/2, size), b64, jcharset, aoqi@0: avail, prefix, false, encodingWord, buf); aoqi@0: } else { aoqi@0: // length <= than 'avail'. Encode the given string aoqi@0: ByteArrayOutputStream os = new ByteArrayOutputStream(BUFFER_SIZE); aoqi@0: OutputStream eos; // the encoder aoqi@0: if (b64) // "B" encoding aoqi@0: eos = new BEncoderStream(os); aoqi@0: else // "Q" encoding aoqi@0: eos = new QEncoderStream(os, encodingWord); aoqi@0: aoqi@0: try { // do the encoding aoqi@0: eos.write(bytes); aoqi@0: eos.close(); aoqi@0: } catch (IOException ioex) { } aoqi@0: aoqi@0: byte[] encodedBytes = os.toByteArray(); // the encoded stuff aoqi@0: // Now write out the encoded (all ASCII) bytes into our aoqi@0: // StringBuffer aoqi@0: if (!first) // not the first line of this sequence aoqi@0: if (foldEncodedWords) aoqi@0: buf.append("\r\n "); // start a continuation line aoqi@0: else aoqi@0: buf.append(" "); // line will be folded later aoqi@0: aoqi@0: buf.append(prefix); aoqi@0: for (int i = 0; i < encodedBytes.length; i++) aoqi@0: buf.append((char)encodedBytes[i]); aoqi@0: buf.append("?="); // terminate the current sequence aoqi@0: } aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * The string is parsed using the rules in RFC 2047 for parsing aoqi@0: * an "encoded-word". If the parse fails, a ParseException is aoqi@0: * thrown. Otherwise, it is transfer-decoded, and then aoqi@0: * charset-converted into Unicode. If the charset-conversion aoqi@0: * fails, an UnsupportedEncodingException is thrown.

aoqi@0: * aoqi@0: * @param eword the possibly encoded value aoqi@0: * @exception ParseException if the string is not an aoqi@0: * encoded-word as per RFC 2047. aoqi@0: * @exception UnsupportedEncodingException if the charset aoqi@0: * conversion failed. aoqi@0: */ aoqi@0: public static String decodeWord(String eword) aoqi@0: throws ParseException, UnsupportedEncodingException { aoqi@0: aoqi@0: if (!eword.startsWith("=?")) // not an encoded word aoqi@0: throw new ParseException(); aoqi@0: aoqi@0: // get charset aoqi@0: int start = 2; int pos; aoqi@0: if ((pos = eword.indexOf('?', start)) == -1) aoqi@0: throw new ParseException(); aoqi@0: String charset = javaCharset(eword.substring(start, pos)); aoqi@0: aoqi@0: // get encoding aoqi@0: start = pos+1; aoqi@0: if ((pos = eword.indexOf('?', start)) == -1) aoqi@0: throw new ParseException(); aoqi@0: String encoding = eword.substring(start, pos); aoqi@0: aoqi@0: // get encoded-sequence aoqi@0: start = pos+1; aoqi@0: if ((pos = eword.indexOf("?=", start)) == -1) aoqi@0: throw new ParseException(); aoqi@0: String word = eword.substring(start, pos); aoqi@0: aoqi@0: try { aoqi@0: // Extract the bytes from word aoqi@0: ByteArrayInputStream bis = aoqi@0: new ByteArrayInputStream(ASCIIUtility.getBytes(word)); aoqi@0: aoqi@0: // Get the appropriate decoder aoqi@0: InputStream is; aoqi@0: if (encoding.equalsIgnoreCase("B")) aoqi@0: is = new BASE64DecoderStream(bis); aoqi@0: else if (encoding.equalsIgnoreCase("Q")) aoqi@0: is = new QDecoderStream(bis); aoqi@0: else aoqi@0: throw new UnsupportedEncodingException( aoqi@0: "unknown encoding: " + encoding); aoqi@0: aoqi@0: // For b64 & q, size of decoded word <= size of word. So aoqi@0: // the decoded bytes must fit into the 'bytes' array. This aoqi@0: // is certainly more efficient than writing bytes into a aoqi@0: // ByteArrayOutputStream and then pulling out the byte[] aoqi@0: // from it. aoqi@0: int count = bis.available(); aoqi@0: byte[] bytes = new byte[count]; aoqi@0: // count is set to the actual number of decoded bytes aoqi@0: count = is.read(bytes, 0, count); aoqi@0: aoqi@0: // Finally, convert the decoded bytes into a String using aoqi@0: // the specified charset aoqi@0: String s = new String(bytes, 0, count, charset); aoqi@0: if (pos + 2 < eword.length()) { aoqi@0: // there's still more text in the string aoqi@0: String rest = eword.substring(pos + 2); aoqi@0: if (!decodeStrict) aoqi@0: rest = decodeInnerWords(rest); aoqi@0: s += rest; aoqi@0: } aoqi@0: return s; aoqi@0: } catch (UnsupportedEncodingException uex) { aoqi@0: // explicitly catch and rethrow this exception, otherwise aoqi@0: // the below IOException catch will swallow this up! aoqi@0: throw uex; aoqi@0: } catch (IOException ioex) { aoqi@0: // Shouldn't happen. aoqi@0: throw new ParseException(); aoqi@0: } catch (IllegalArgumentException iex) { aoqi@0: /* An unknown charset of the form ISO-XXX-XXX, will cause aoqi@0: * the JDK to throw an IllegalArgumentException ... Since the aoqi@0: * JDK will attempt to create a classname using this string, aoqi@0: * but valid classnames must not contain the character '-', aoqi@0: * and this results in an IllegalArgumentException, rather than aoqi@0: * the expected UnsupportedEncodingException. Yikes aoqi@0: */ aoqi@0: throw new UnsupportedEncodingException(); aoqi@0: } aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Look for encoded words within a word. The MIME spec doesn't aoqi@0: * allow this, but many broken mailers, especially Japanese mailers, aoqi@0: * produce such incorrect encodings. aoqi@0: */ aoqi@0: private static String decodeInnerWords(String word) aoqi@0: throws UnsupportedEncodingException { aoqi@0: int start = 0, i; aoqi@0: StringBuffer buf = new StringBuffer(); aoqi@0: while ((i = word.indexOf("=?", start)) >= 0) { aoqi@0: buf.append(word.substring(start, i)); aoqi@0: int end = word.indexOf("?=", i); aoqi@0: if (end < 0) aoqi@0: break; aoqi@0: String s = word.substring(i, end + 2); aoqi@0: try { aoqi@0: s = decodeWord(s); aoqi@0: } catch (ParseException pex) { aoqi@0: // ignore it, just use the original string aoqi@0: } aoqi@0: buf.append(s); aoqi@0: start = end + 2; aoqi@0: } aoqi@0: if (start == 0) aoqi@0: return word; aoqi@0: if (start < word.length()) aoqi@0: buf.append(word.substring(start)); aoqi@0: return buf.toString(); aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * A utility method to quote a word, if the word contains any aoqi@0: * characters from the specified 'specials' list.

aoqi@0: * aoqi@0: * The HeaderTokenizer class defines two special aoqi@0: * sets of delimiters - MIME and RFC 822.

aoqi@0: * aoqi@0: * This method is typically used during the generation of aoqi@0: * RFC 822 and MIME header fields. aoqi@0: * aoqi@0: * @param word word to be quoted aoqi@0: * @param specials the set of special characters aoqi@0: * @return the possibly quoted word aoqi@0: * @see javax.mail.internet.HeaderTokenizer#MIME aoqi@0: * @see javax.mail.internet.HeaderTokenizer#RFC822 aoqi@0: */ aoqi@0: public static String quote(String word, String specials) { aoqi@0: int len = word.length(); aoqi@0: aoqi@0: /* aoqi@0: * Look for any "bad" characters, Escape and aoqi@0: * quote the entire string if necessary. aoqi@0: */ aoqi@0: boolean needQuoting = false; aoqi@0: for (int i = 0; i < len; i++) { aoqi@0: char c = word.charAt(i); aoqi@0: if (c == '"' || c == '\\' || c == '\r' || c == '\n') { aoqi@0: // need to escape them and then quote the whole string aoqi@0: StringBuffer sb = new StringBuffer(len + 3); aoqi@0: sb.append('"'); aoqi@0: sb.append(word.substring(0, i)); aoqi@0: int lastc = 0; aoqi@0: for (int j = i; j < len; j++) { aoqi@0: char cc = word.charAt(j); aoqi@0: if ((cc == '"') || (cc == '\\') || aoqi@0: (cc == '\r') || (cc == '\n')) aoqi@0: if (cc == '\n' && lastc == '\r') aoqi@0: ; // do nothing, CR was already escaped aoqi@0: else aoqi@0: sb.append('\\'); // Escape the character aoqi@0: sb.append(cc); aoqi@0: lastc = cc; aoqi@0: } aoqi@0: sb.append('"'); aoqi@0: return sb.toString(); aoqi@0: } else if (c < 040 || c >= 0177 || specials.indexOf(c) >= 0) aoqi@0: // These characters cause the string to be quoted aoqi@0: needQuoting = true; aoqi@0: } aoqi@0: aoqi@0: if (needQuoting) { aoqi@0: StringBuffer sb = new StringBuffer(len + 2); aoqi@0: sb.append('"').append(word).append('"'); aoqi@0: return sb.toString(); aoqi@0: } else aoqi@0: return word; aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Fold a string at linear whitespace so that each line is no longer aoqi@0: * than 76 characters, if possible. If there are more than 76 aoqi@0: * non-whitespace characters consecutively, the string is folded at aoqi@0: * the first whitespace after that sequence. The parameter aoqi@0: * used indicates how many characters have been used in aoqi@0: * the current line; it is usually the length of the header name.

aoqi@0: * aoqi@0: * Note that line breaks in the string aren't escaped; they probably aoqi@0: * should be. aoqi@0: * aoqi@0: * @param used characters used in line so far aoqi@0: * @param s the string to fold aoqi@0: * @return the folded string aoqi@0: */ aoqi@0: /*public*/ static String fold(int used, String s) { aoqi@0: if (!foldText) aoqi@0: return s; aoqi@0: aoqi@0: int end; aoqi@0: char c; aoqi@0: // Strip trailing spaces aoqi@0: for (end = s.length() - 1; end >= 0; end--) { aoqi@0: c = s.charAt(end); aoqi@0: if (c != ' ' && c != '\t') aoqi@0: break; aoqi@0: } aoqi@0: if (end != s.length() - 1) aoqi@0: s = s.substring(0, end + 1); aoqi@0: aoqi@0: // if the string fits now, just return it aoqi@0: if (used + s.length() <= 76) aoqi@0: return s; aoqi@0: aoqi@0: // have to actually fold the string aoqi@0: StringBuffer sb = new StringBuffer(s.length() + 4); aoqi@0: char lastc = 0; aoqi@0: while (used + s.length() > 76) { aoqi@0: int lastspace = -1; aoqi@0: for (int i = 0; i < s.length(); i++) { aoqi@0: if (lastspace != -1 && used + i > 76) aoqi@0: break; aoqi@0: c = s.charAt(i); aoqi@0: if (c == ' ' || c == '\t') aoqi@0: if (!(lastc == ' ' || lastc == '\t')) aoqi@0: lastspace = i; aoqi@0: lastc = c; aoqi@0: } aoqi@0: if (lastspace == -1) { aoqi@0: // no space, use the whole thing aoqi@0: sb.append(s); aoqi@0: s = ""; aoqi@0: used = 0; aoqi@0: break; aoqi@0: } aoqi@0: sb.append(s.substring(0, lastspace)); aoqi@0: sb.append("\r\n"); aoqi@0: lastc = s.charAt(lastspace); aoqi@0: sb.append(lastc); aoqi@0: s = s.substring(lastspace + 1); aoqi@0: used = 1; aoqi@0: } aoqi@0: sb.append(s); aoqi@0: return sb.toString(); aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Unfold a folded header. Any line breaks that aren't escaped and aoqi@0: * are followed by whitespace are removed. aoqi@0: * aoqi@0: * @param s the string to unfold aoqi@0: * @return the unfolded string aoqi@0: */ aoqi@0: /*public*/ static String unfold(String s) { aoqi@0: if (!foldText) aoqi@0: return s; aoqi@0: aoqi@0: StringBuffer sb = null; aoqi@0: int i; aoqi@0: while ((i = indexOfAny(s, "\r\n")) >= 0) { aoqi@0: int start = i; aoqi@0: int l = s.length(); aoqi@0: i++; // skip CR or NL aoqi@0: if (i < l && s.charAt(i - 1) == '\r' && s.charAt(i) == '\n') aoqi@0: i++; // skip LF aoqi@0: if (start == 0 || s.charAt(start - 1) != '\\') { aoqi@0: char c; aoqi@0: // if next line starts with whitespace, skip all of it aoqi@0: // XXX - always has to be true? aoqi@0: if (i < l && ((c = s.charAt(i)) == ' ' || c == '\t')) { aoqi@0: i++; // skip whitespace aoqi@0: while (i < l && ((c = s.charAt(i)) == ' ' || c == '\t')) aoqi@0: i++; aoqi@0: if (sb == null) aoqi@0: sb = new StringBuffer(s.length()); aoqi@0: if (start != 0) { aoqi@0: sb.append(s.substring(0, start)); aoqi@0: sb.append(' '); aoqi@0: } aoqi@0: s = s.substring(i); aoqi@0: continue; aoqi@0: } aoqi@0: // it's not a continuation line, just leave it in aoqi@0: if (sb == null) aoqi@0: sb = new StringBuffer(s.length()); aoqi@0: sb.append(s.substring(0, i)); aoqi@0: s = s.substring(i); aoqi@0: } else { aoqi@0: // there's a backslash at "start - 1" aoqi@0: // strip it out, but leave in the line break aoqi@0: if (sb == null) aoqi@0: sb = new StringBuffer(s.length()); aoqi@0: sb.append(s.substring(0, start - 1)); aoqi@0: sb.append(s.substring(start, i)); aoqi@0: s = s.substring(i); aoqi@0: } aoqi@0: } aoqi@0: if (sb != null) { aoqi@0: sb.append(s); aoqi@0: return sb.toString(); aoqi@0: } else aoqi@0: return s; aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Return the first index of any of the characters in "any" in "s", aoqi@0: * or -1 if none are found. aoqi@0: * aoqi@0: * This should be a method on String. aoqi@0: */ aoqi@0: private static int indexOfAny(String s, String any) { aoqi@0: return indexOfAny(s, any, 0); aoqi@0: } aoqi@0: aoqi@0: private static int indexOfAny(String s, String any, int start) { aoqi@0: try { aoqi@0: int len = s.length(); aoqi@0: for (int i = start; i < len; i++) { aoqi@0: if (any.indexOf(s.charAt(i)) >= 0) aoqi@0: return i; aoqi@0: } aoqi@0: return -1; aoqi@0: } catch (StringIndexOutOfBoundsException e) { aoqi@0: return -1; aoqi@0: } aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Convert a MIME charset name into a valid Java charset name.

aoqi@0: * aoqi@0: * @param charset the MIME charset name aoqi@0: * @return the Java charset equivalent. If a suitable mapping is aoqi@0: * not available, the passed in charset is itself returned. aoqi@0: */ aoqi@0: public static String javaCharset(String charset) { aoqi@0: if (mime2java == null || charset == null) aoqi@0: // no mapping table, or charset parameter is null aoqi@0: return charset; aoqi@0: aoqi@0: String alias = (String)mime2java.get(charset.toLowerCase()); aoqi@0: return alias == null ? charset : alias; aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Convert a java charset into its MIME charset name.

aoqi@0: * aoqi@0: * Note that a future version of JDK (post 1.2) might provide aoqi@0: * this functionality, in which case, we may deprecate this aoqi@0: * method then. aoqi@0: * aoqi@0: * @param charset the JDK charset aoqi@0: * @return the MIME/IANA equivalent. If a mapping aoqi@0: * is not possible, the passed in charset itself aoqi@0: * is returned. aoqi@0: * @since JavaMail 1.1 aoqi@0: */ aoqi@0: public static String mimeCharset(String charset) { aoqi@0: if (java2mime == null || charset == null) aoqi@0: // no mapping table or charset param is null aoqi@0: return charset; aoqi@0: aoqi@0: String alias = (String)java2mime.get(charset.toLowerCase()); aoqi@0: return alias == null ? charset : alias; aoqi@0: } aoqi@0: aoqi@0: private static String defaultJavaCharset; aoqi@0: private static String defaultMIMECharset; aoqi@0: aoqi@0: /** aoqi@0: * Get the default charset corresponding to the system's current aoqi@0: * default locale. If the System property mail.mime.charset aoqi@0: * is set, a system charset corresponding to this MIME charset will be aoqi@0: * returned.

aoqi@0: * aoqi@0: * @return the default charset of the system's default locale, aoqi@0: * as a Java charset. (NOT a MIME charset) aoqi@0: * @since JavaMail 1.1 aoqi@0: */ aoqi@0: public static String getDefaultJavaCharset() { aoqi@0: if (defaultJavaCharset == null) { aoqi@0: /* aoqi@0: * If mail.mime.charset is set, it controls the default aoqi@0: * Java charset as well. aoqi@0: */ aoqi@0: String mimecs = null; aoqi@0: aoqi@0: mimecs = SAAJUtil.getSystemProperty("mail.mime.charset"); aoqi@0: aoqi@0: if (mimecs != null && mimecs.length() > 0) { aoqi@0: defaultJavaCharset = javaCharset(mimecs); aoqi@0: return defaultJavaCharset; aoqi@0: } aoqi@0: aoqi@0: try { aoqi@0: defaultJavaCharset = System.getProperty("file.encoding", aoqi@0: "8859_1"); aoqi@0: } catch (SecurityException sex) { aoqi@0: aoqi@0: class NullInputStream extends InputStream { aoqi@0: public int read() { aoqi@0: return 0; aoqi@0: } aoqi@0: } aoqi@0: InputStreamReader reader = aoqi@0: new InputStreamReader(new NullInputStream()); aoqi@0: defaultJavaCharset = reader.getEncoding(); aoqi@0: if (defaultJavaCharset == null) aoqi@0: defaultJavaCharset = "8859_1"; aoqi@0: } aoqi@0: } aoqi@0: aoqi@0: return defaultJavaCharset; aoqi@0: } aoqi@0: aoqi@0: /* aoqi@0: * Get the default MIME charset for this locale. aoqi@0: */ aoqi@0: static String getDefaultMIMECharset() { aoqi@0: if (defaultMIMECharset == null) { aoqi@0: defaultMIMECharset = SAAJUtil.getSystemProperty("mail.mime.charset"); aoqi@0: } aoqi@0: if (defaultMIMECharset == null) aoqi@0: defaultMIMECharset = mimeCharset(getDefaultJavaCharset()); aoqi@0: return defaultMIMECharset; aoqi@0: } aoqi@0: aoqi@0: // Tables to map MIME charset names to Java names and vice versa. aoqi@0: // XXX - Should eventually use J2SE 1.4 java.nio.charset.Charset aoqi@0: private static Hashtable mime2java; aoqi@0: private static Hashtable java2mime; aoqi@0: aoqi@0: static { aoqi@0: java2mime = new Hashtable(40); aoqi@0: mime2java = new Hashtable(10); aoqi@0: aoqi@0: try { aoqi@0: // Use this class's classloader to load the mapping file aoqi@0: // XXX - we should use SecuritySupport, but it's in another package aoqi@0: InputStream is = aoqi@0: com.sun.xml.internal.messaging.saaj.packaging.mime.internet.MimeUtility.class.getResourceAsStream( aoqi@0: "/META-INF/javamail.charset.map"); aoqi@0: aoqi@0: if (is != null) { aoqi@0: is = new LineInputStream(is); aoqi@0: aoqi@0: // Load the JDK-to-MIME charset mapping table aoqi@0: loadMappings((LineInputStream)is, java2mime); aoqi@0: aoqi@0: // Load the MIME-to-JDK charset mapping table aoqi@0: loadMappings((LineInputStream)is, mime2java); aoqi@0: } aoqi@0: } catch (Exception ex) { } aoqi@0: aoqi@0: // If we didn't load the tables, e.g., because we didn't have aoqi@0: // permission, load them manually. The entries here should be aoqi@0: // the same as the default javamail.charset.map. aoqi@0: if (java2mime.isEmpty()) { aoqi@0: java2mime.put("8859_1", "ISO-8859-1"); aoqi@0: java2mime.put("iso8859_1", "ISO-8859-1"); aoqi@0: java2mime.put("ISO8859-1", "ISO-8859-1"); aoqi@0: aoqi@0: java2mime.put("8859_2", "ISO-8859-2"); aoqi@0: java2mime.put("iso8859_2", "ISO-8859-2"); aoqi@0: java2mime.put("ISO8859-2", "ISO-8859-2"); aoqi@0: aoqi@0: java2mime.put("8859_3", "ISO-8859-3"); aoqi@0: java2mime.put("iso8859_3", "ISO-8859-3"); aoqi@0: java2mime.put("ISO8859-3", "ISO-8859-3"); aoqi@0: aoqi@0: java2mime.put("8859_4", "ISO-8859-4"); aoqi@0: java2mime.put("iso8859_4", "ISO-8859-4"); aoqi@0: java2mime.put("ISO8859-4", "ISO-8859-4"); aoqi@0: aoqi@0: java2mime.put("8859_5", "ISO-8859-5"); aoqi@0: java2mime.put("iso8859_5", "ISO-8859-5"); aoqi@0: java2mime.put("ISO8859-5", "ISO-8859-5"); aoqi@0: aoqi@0: java2mime.put("8859_6", "ISO-8859-6"); aoqi@0: java2mime.put("iso8859_6", "ISO-8859-6"); aoqi@0: java2mime.put("ISO8859-6", "ISO-8859-6"); aoqi@0: aoqi@0: java2mime.put("8859_7", "ISO-8859-7"); aoqi@0: java2mime.put("iso8859_7", "ISO-8859-7"); aoqi@0: java2mime.put("ISO8859-7", "ISO-8859-7"); aoqi@0: aoqi@0: java2mime.put("8859_8", "ISO-8859-8"); aoqi@0: java2mime.put("iso8859_8", "ISO-8859-8"); aoqi@0: java2mime.put("ISO8859-8", "ISO-8859-8"); aoqi@0: aoqi@0: java2mime.put("8859_9", "ISO-8859-9"); aoqi@0: java2mime.put("iso8859_9", "ISO-8859-9"); aoqi@0: java2mime.put("ISO8859-9", "ISO-8859-9"); aoqi@0: aoqi@0: java2mime.put("SJIS", "Shift_JIS"); aoqi@0: java2mime.put("MS932", "Shift_JIS"); aoqi@0: java2mime.put("JIS", "ISO-2022-JP"); aoqi@0: java2mime.put("ISO2022JP", "ISO-2022-JP"); aoqi@0: java2mime.put("EUC_JP", "euc-jp"); aoqi@0: java2mime.put("KOI8_R", "koi8-r"); aoqi@0: java2mime.put("EUC_CN", "euc-cn"); aoqi@0: java2mime.put("EUC_TW", "euc-tw"); aoqi@0: java2mime.put("EUC_KR", "euc-kr"); aoqi@0: } aoqi@0: if (mime2java.isEmpty()) { aoqi@0: mime2java.put("iso-2022-cn", "ISO2022CN"); aoqi@0: mime2java.put("iso-2022-kr", "ISO2022KR"); aoqi@0: mime2java.put("utf-8", "UTF8"); aoqi@0: mime2java.put("utf8", "UTF8"); aoqi@0: mime2java.put("ja_jp.iso2022-7", "ISO2022JP"); aoqi@0: mime2java.put("ja_jp.eucjp", "EUCJIS"); aoqi@0: mime2java.put("euc-kr", "KSC5601"); aoqi@0: mime2java.put("euckr", "KSC5601"); aoqi@0: mime2java.put("us-ascii", "ISO-8859-1"); aoqi@0: mime2java.put("x-us-ascii", "ISO-8859-1"); aoqi@0: } aoqi@0: } aoqi@0: aoqi@0: private static void loadMappings(LineInputStream is, Hashtable table) { aoqi@0: String currLine; aoqi@0: aoqi@0: while (true) { aoqi@0: try { aoqi@0: currLine = is.readLine(); aoqi@0: } catch (IOException ioex) { aoqi@0: break; // error in reading, stop aoqi@0: } aoqi@0: aoqi@0: if (currLine == null) // end of file, stop aoqi@0: break; aoqi@0: if (currLine.startsWith("--") && currLine.endsWith("--")) aoqi@0: // end of this table aoqi@0: break; aoqi@0: aoqi@0: // ignore empty lines and comments aoqi@0: if (currLine.trim().length() == 0 || currLine.startsWith("#")) aoqi@0: continue; aoqi@0: aoqi@0: // A valid entry is of the form aoqi@0: // where, := SPACE | HT. Parse this aoqi@0: StringTokenizer tk = new StringTokenizer(currLine, " \t"); aoqi@0: try { aoqi@0: String key = tk.nextToken(); aoqi@0: String value = tk.nextToken(); aoqi@0: table.put(key.toLowerCase(), value); aoqi@0: } catch (NoSuchElementException nex) { } aoqi@0: } aoqi@0: } aoqi@0: aoqi@0: static final int ALL_ASCII = 1; aoqi@0: static final int MOSTLY_ASCII = 2; aoqi@0: static final int MOSTLY_NONASCII = 3; aoqi@0: aoqi@0: /** aoqi@0: * Check if the given string contains non US-ASCII characters. aoqi@0: * @param s string aoqi@0: * @return ALL_ASCII if all characters in the string aoqi@0: * belong to the US-ASCII charset. MOSTLY_ASCII aoqi@0: * if more than half of the available characters aoqi@0: * are US-ASCII characters. Else MOSTLY_NONASCII. aoqi@0: */ aoqi@0: static int checkAscii(String s) { aoqi@0: int ascii = 0, non_ascii = 0; aoqi@0: int l = s.length(); aoqi@0: aoqi@0: for (int i = 0; i < l; i++) { aoqi@0: if (nonascii((int)s.charAt(i))) // non-ascii aoqi@0: non_ascii++; aoqi@0: else aoqi@0: ascii++; aoqi@0: } aoqi@0: aoqi@0: if (non_ascii == 0) aoqi@0: return ALL_ASCII; aoqi@0: if (ascii > non_ascii) aoqi@0: return MOSTLY_ASCII; aoqi@0: aoqi@0: return MOSTLY_NONASCII; aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Check if the given byte array contains non US-ASCII characters. aoqi@0: * @param b byte array aoqi@0: * @return ALL_ASCII if all characters in the string aoqi@0: * belong to the US-ASCII charset. MOSTLY_ASCII aoqi@0: * if more than half of the available characters aoqi@0: * are US-ASCII characters. Else MOSTLY_NONASCII. aoqi@0: * aoqi@0: * XXX - this method is no longer used aoqi@0: */ aoqi@0: static int checkAscii(byte[] b) { aoqi@0: int ascii = 0, non_ascii = 0; aoqi@0: aoqi@0: for (int i=0; i < b.length; i++) { aoqi@0: // The '&' operator automatically causes b[i] to be promoted aoqi@0: // to an int, and we mask out the higher bytes in the int aoqi@0: // so that the resulting value is not a negative integer. aoqi@0: if (nonascii(b[i] & 0xff)) // non-ascii aoqi@0: non_ascii++; aoqi@0: else aoqi@0: ascii++; aoqi@0: } aoqi@0: aoqi@0: if (non_ascii == 0) aoqi@0: return ALL_ASCII; aoqi@0: if (ascii > non_ascii) aoqi@0: return MOSTLY_ASCII; aoqi@0: aoqi@0: return MOSTLY_NONASCII; aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Check if the given input stream contains non US-ASCII characters. aoqi@0: * Upto max bytes are checked. If max is aoqi@0: * set to ALL, then all the bytes available in this aoqi@0: * input stream are checked. If breakOnNonAscii is true aoqi@0: * the check terminates when the first non-US-ASCII character is aoqi@0: * found and MOSTLY_NONASCII is returned. Else, the check continues aoqi@0: * till max bytes or till the end of stream. aoqi@0: * aoqi@0: * @param is the input stream aoqi@0: * @param max maximum bytes to check for. The special value aoqi@0: * ALL indicates that all the bytes in this input aoqi@0: * stream must be checked. aoqi@0: * @param breakOnNonAscii if true, then terminate the aoqi@0: * the check when the first non-US-ASCII character aoqi@0: * is found. aoqi@0: * @return ALL_ASCII if all characters in the string aoqi@0: * belong to the US-ASCII charset. MOSTLY_ASCII aoqi@0: * if more than half of the available characters aoqi@0: * are US-ASCII characters. Else MOSTLY_NONASCII. aoqi@0: */ aoqi@0: static int checkAscii(InputStream is, int max, boolean breakOnNonAscii) { aoqi@0: int ascii = 0, non_ascii = 0; aoqi@0: int len; aoqi@0: int block = 4096; aoqi@0: int linelen = 0; aoqi@0: boolean longLine = false, badEOL = false; aoqi@0: boolean checkEOL = encodeEolStrict && breakOnNonAscii; aoqi@0: byte buf[] = null; aoqi@0: if (max != 0) { aoqi@0: block = (max == ALL) ? 4096 : Math.min(max, 4096); aoqi@0: buf = new byte[block]; aoqi@0: } aoqi@0: while (max != 0) { aoqi@0: try { aoqi@0: if ((len = is.read(buf, 0, block)) == -1) aoqi@0: break; aoqi@0: int lastb = 0; aoqi@0: for (int i = 0; i < len; i++) { aoqi@0: // The '&' operator automatically causes b[i] to aoqi@0: // be promoted to an int, and we mask out the higher aoqi@0: // bytes in the int so that the resulting value is aoqi@0: // not a negative integer. aoqi@0: int b = buf[i] & 0xff; aoqi@0: if (checkEOL && aoqi@0: ((lastb == '\r' && b != '\n') || aoqi@0: (lastb != '\r' && b == '\n'))) aoqi@0: badEOL = true; aoqi@0: if (b == '\r' || b == '\n') aoqi@0: linelen = 0; aoqi@0: else { aoqi@0: linelen++; aoqi@0: if (linelen > 998) // 1000 - CRLF aoqi@0: longLine = true; aoqi@0: } aoqi@0: if (nonascii(b)) { // non-ascii aoqi@0: if (breakOnNonAscii) // we are done aoqi@0: return MOSTLY_NONASCII; aoqi@0: else aoqi@0: non_ascii++; aoqi@0: } else aoqi@0: ascii++; aoqi@0: lastb = b; aoqi@0: } aoqi@0: } catch (IOException ioex) { aoqi@0: break; aoqi@0: } aoqi@0: if (max != ALL) aoqi@0: max -= len; aoqi@0: } aoqi@0: aoqi@0: if (max == 0 && breakOnNonAscii) aoqi@0: // We have been told to break on the first non-ascii character. aoqi@0: // We haven't got any non-ascii character yet, but then we aoqi@0: // have not checked all of the available bytes either. So we aoqi@0: // cannot say for sure that this input stream is ALL_ASCII, aoqi@0: // and hence we must play safe and return MOSTLY_NONASCII aoqi@0: aoqi@0: return MOSTLY_NONASCII; aoqi@0: aoqi@0: if (non_ascii == 0) { // no non-us-ascii characters so far aoqi@0: // If we're looking at non-text data, and we saw CR without LF aoqi@0: // or vice versa, consider this mostly non-ASCII so that it aoqi@0: // will be base64 encoded (since the quoted-printable encoder aoqi@0: // doesn't encode this case properly). aoqi@0: if (badEOL) aoqi@0: return MOSTLY_NONASCII; aoqi@0: // if we've seen a long line, we degrade to mostly ascii aoqi@0: else if (longLine) aoqi@0: return MOSTLY_ASCII; aoqi@0: else aoqi@0: return ALL_ASCII; aoqi@0: } aoqi@0: if (ascii > non_ascii) // mostly ascii aoqi@0: return MOSTLY_ASCII; aoqi@0: return MOSTLY_NONASCII; aoqi@0: } aoqi@0: aoqi@0: static final boolean nonascii(int b) { aoqi@0: return b >= 0177 || (b < 040 && b != '\r' && b != '\n' && b != '\t'); aoqi@0: } aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * An OutputStream that determines whether the data written to aoqi@0: * it is all ASCII, mostly ASCII, or mostly non-ASCII. aoqi@0: */ aoqi@0: class AsciiOutputStream extends OutputStream { aoqi@0: private boolean breakOnNonAscii; aoqi@0: private int ascii = 0, non_ascii = 0; aoqi@0: private int linelen = 0; aoqi@0: private boolean longLine = false; aoqi@0: private boolean badEOL = false; aoqi@0: private boolean checkEOL = false; aoqi@0: private int lastb = 0; aoqi@0: private int ret = 0; aoqi@0: aoqi@0: public AsciiOutputStream(boolean breakOnNonAscii, boolean encodeEolStrict) { aoqi@0: this.breakOnNonAscii = breakOnNonAscii; aoqi@0: checkEOL = encodeEolStrict && breakOnNonAscii; aoqi@0: } aoqi@0: aoqi@0: public void write(int b) throws IOException { aoqi@0: check(b); aoqi@0: } aoqi@0: aoqi@0: public void write(byte b[]) throws IOException { aoqi@0: write(b, 0, b.length); aoqi@0: } aoqi@0: aoqi@0: public void write(byte b[], int off, int len) throws IOException { aoqi@0: len += off; aoqi@0: for (int i = off; i < len ; i++) aoqi@0: check(b[i]); aoqi@0: } aoqi@0: aoqi@0: private final void check(int b) throws IOException { aoqi@0: b &= 0xff; aoqi@0: if (checkEOL && aoqi@0: ((lastb == '\r' && b != '\n') || (lastb != '\r' && b == '\n'))) aoqi@0: badEOL = true; aoqi@0: if (b == '\r' || b == '\n') aoqi@0: linelen = 0; aoqi@0: else { aoqi@0: linelen++; aoqi@0: if (linelen > 998) // 1000 - CRLF aoqi@0: longLine = true; aoqi@0: } aoqi@0: if (MimeUtility.nonascii(b)) { // non-ascii aoqi@0: non_ascii++; aoqi@0: if (breakOnNonAscii) { // we are done aoqi@0: ret = MimeUtility.MOSTLY_NONASCII; aoqi@0: throw new EOFException(); aoqi@0: } aoqi@0: } else aoqi@0: ascii++; aoqi@0: lastb = b; aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Return ASCII-ness of data stream. aoqi@0: */ aoqi@0: public int getAscii() { aoqi@0: if (ret != 0) aoqi@0: return ret; aoqi@0: // If we're looking at non-text data, and we saw CR without LF aoqi@0: // or vice versa, consider this mostly non-ASCII so that it aoqi@0: // will be base64 encoded (since the quoted-printable encoder aoqi@0: // doesn't encode this case properly). aoqi@0: if (badEOL) aoqi@0: return MimeUtility.MOSTLY_NONASCII; aoqi@0: else if (non_ascii == 0) { // no non-us-ascii characters so far aoqi@0: // if we've seen a long line, we degrade to mostly ascii aoqi@0: if (longLine) aoqi@0: return MimeUtility.MOSTLY_ASCII; aoqi@0: else aoqi@0: return MimeUtility.ALL_ASCII; aoqi@0: } aoqi@0: if (ascii > non_ascii) // mostly ascii aoqi@0: return MimeUtility.MOSTLY_ASCII; aoqi@0: return MimeUtility.MOSTLY_NONASCII; aoqi@0: } aoqi@0: }