aoqi@0: /* aoqi@0: * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. aoqi@0: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. aoqi@0: * aoqi@0: * This code is free software; you can redistribute it and/or modify it aoqi@0: * under the terms of the GNU General Public License version 2 only, as aoqi@0: * published by the Free Software Foundation. Oracle designates this aoqi@0: * particular file as subject to the "Classpath" exception as provided aoqi@0: * by Oracle in the LICENSE file that accompanied this code. aoqi@0: * aoqi@0: * This code is distributed in the hope that it will be useful, but WITHOUT aoqi@0: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or aoqi@0: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License aoqi@0: * version 2 for more details (a copy is included in the LICENSE file that aoqi@0: * accompanied this code). aoqi@0: * aoqi@0: * You should have received a copy of the GNU General Public License version aoqi@0: * 2 along with this work; if not, write to the Free Software Foundation, aoqi@0: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. aoqi@0: * aoqi@0: * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA aoqi@0: * or visit www.oracle.com if you need additional information or have any aoqi@0: * questions. aoqi@0: */ aoqi@0: aoqi@0: /* aoqi@0: * @(#)MimeUtility.java 1.45 03/03/10 aoqi@0: */ aoqi@0: aoqi@0: aoqi@0: aoqi@0: package com.sun.xml.internal.messaging.saaj.packaging.mime.internet; aoqi@0: aoqi@0: import java.io.*; aoqi@0: import java.util.*; aoqi@0: aoqi@0: import javax.activation.DataHandler; aoqi@0: import javax.activation.DataSource; aoqi@0: aoqi@0: import com.sun.xml.internal.messaging.saaj.packaging.mime.MessagingException; aoqi@0: import com.sun.xml.internal.messaging.saaj.packaging.mime.util.*; aoqi@0: import com.sun.xml.internal.messaging.saaj.util.SAAJUtil; aoqi@0: aoqi@0: /** aoqi@0: * This is a utility class that provides various MIME related aoqi@0: * functionality.
aoqi@0: * aoqi@0: * There are a set of methods to encode and decode MIME headers as aoqi@0: * per RFC 2047. A brief description on handling such headers is aoqi@0: * given below:
aoqi@0: * aoqi@0: * RFC 822 mail headers must contain only US-ASCII aoqi@0: * characters. Headers that contain non US-ASCII characters must be aoqi@0: * encoded so that they contain only US-ASCII characters. Basically, aoqi@0: * this process involves using either BASE64 or QP to encode certain aoqi@0: * characters. RFC 2047 describes this in detail.
aoqi@0: * aoqi@0: * In Java, Strings contain (16 bit) Unicode characters. ASCII is a aoqi@0: * subset of Unicode (and occupies the range 0 - 127). A String aoqi@0: * that contains only ASCII characters is already mail-safe. If the aoqi@0: * String contains non US-ASCII characters, it must be encoded. An aoqi@0: * additional complexity in this step is that since Unicode is not aoqi@0: * yet a widely used charset, one might want to first charset-encode aoqi@0: * the String into another charset and then do the transfer-encoding. aoqi@0: *
aoqi@0: * Note that to get the actual bytes of a mail-safe String (say, aoqi@0: * for sending over SMTP), one must do aoqi@0: *
aoqi@0: * aoqi@0: * byte[] bytes = string.getBytes("iso-8859-1"); aoqi@0: * aoqi@0: *
aoqi@0: *
aoqi@0: * The setHeader
and addHeader
methods
aoqi@0: * on MimeMessage and MimeBodyPart assume that the given header values
aoqi@0: * are Unicode strings that contain only US-ASCII characters. Hence
aoqi@0: * the callers of those methods must insure that the values they pass
aoqi@0: * do not contain non US-ASCII characters. The methods in this class
aoqi@0: * help do this.
aoqi@0: *
aoqi@0: * The getHeader
family of methods on MimeMessage and
aoqi@0: * MimeBodyPart return the raw header value. These might be encoded
aoqi@0: * as per RFC 2047, and if so, must be decoded into Unicode Strings.
aoqi@0: * The methods in this class help to do this.
aoqi@0: * aoqi@0: * Several System properties control strict conformance to the MIME aoqi@0: * spec. Note that these are not session properties but must be set aoqi@0: * globally as System properties.
aoqi@0: *
aoqi@0: * The mail.mime.decodetext.strict
property controls
aoqi@0: * decoding of MIME encoded words. The MIME spec requires that encoded
aoqi@0: * words start at the beginning of a whitespace separated word. Some
aoqi@0: * mailers incorrectly include encoded words in the middle of a word.
aoqi@0: * If the mail.mime.decodetext.strict
System property is
aoqi@0: * set to "false"
, an attempt will be made to decode these
aoqi@0: * illegal encoded words. The default is true.
aoqi@0: *
aoqi@0: * The mail.mime.encodeeol.strict
property controls the
aoqi@0: * choice of Content-Transfer-Encoding for MIME parts that are not of
aoqi@0: * type "text". Often such parts will contain textual data for which
aoqi@0: * an encoding that allows normal end of line conventions is appropriate.
aoqi@0: * In rare cases, such a part will appear to contain entirely textual
aoqi@0: * data, but will require an encoding that preserves CR and LF characters
aoqi@0: * without change. If the mail.mime.decodetext.strict
aoqi@0: * System property is set to "true"
, such an encoding will
aoqi@0: * be used when necessary. The default is false.
aoqi@0: *
aoqi@0: * In addition, the mail.mime.charset
System property can
aoqi@0: * be used to specify the default MIME charset to use for encoded words
aoqi@0: * and text parts that don't otherwise specify a charset. Normally, the
aoqi@0: * default MIME charset is derived from the default Java charset, as
aoqi@0: * specified in the file.encoding
System property. Most
aoqi@0: * applications will have no need to explicitly set the default MIME
aoqi@0: * charset. In cases where the default MIME charset to be used for
aoqi@0: * mail messages is different than the charset used for files stored on
aoqi@0: * the system, this property should be set.
aoqi@0: *
aoqi@0: * @version 1.45, 03/03/10
aoqi@0: * @author John Mani
aoqi@0: * @author Bill Shannon
aoqi@0: */
aoqi@0:
aoqi@0: public class MimeUtility {
aoqi@0:
aoqi@0: // This class cannot be instantiated
aoqi@0: private MimeUtility() { }
aoqi@0:
aoqi@0: public static final int ALL = -1;
aoqi@0:
aoqi@0: private static final int BUFFER_SIZE = 1024;
aoqi@0: private static boolean decodeStrict = true;
aoqi@0: private static boolean encodeEolStrict = false;
aoqi@0: private static boolean foldEncodedWords = false;
aoqi@0: private static boolean foldText = true;
aoqi@0:
aoqi@0: static {
aoqi@0: try {
aoqi@0: String s = SAAJUtil.getSystemProperty("mail.mime.decodetext.strict");
aoqi@0: // default to true
aoqi@0: decodeStrict = s == null || !s.equalsIgnoreCase("false");
aoqi@0: s = SAAJUtil.getSystemProperty("mail.mime.encodeeol.strict");
aoqi@0: // default to false
aoqi@0: encodeEolStrict = s != null && s.equalsIgnoreCase("true");
aoqi@0: s = SAAJUtil.getSystemProperty("mail.mime.foldencodedwords");
aoqi@0: // default to false
aoqi@0: foldEncodedWords = s != null && s.equalsIgnoreCase("true");
aoqi@0: s = SAAJUtil.getSystemProperty("mail.mime.foldtext");
aoqi@0: // default to true
aoqi@0: foldText = s == null || !s.equalsIgnoreCase("false");
aoqi@0: } catch (SecurityException sex) {
aoqi@0: // ignore it
aoqi@0: }
aoqi@0: }
aoqi@0:
aoqi@0:
aoqi@0: /**
aoqi@0: * Get the content-transfer-encoding that should be applied
aoqi@0: * to the input stream of this datasource, to make it mailsafe.
aoqi@0: *
aoqi@0: * The algorithm used here is:
aoqi@0: *
getEncoding(DataSource)
except that instead
aoqi@0: * of reading the data from an InputStream
it uses the
aoqi@0: * writeTo
method to examine the data. This is more
aoqi@0: * efficient in the common case of a DataHandler
aoqi@0: * created with an object and a MIME type (for example, a
aoqi@0: * "text/plain" String) because all the I/O is done in this
aoqi@0: * thread. In the case requiring an InputStream
the
aoqi@0: * DataHandler
uses a thread, a pair of pipe streams,
aoqi@0: * and the writeTo
method to produce the data.
aoqi@0: *
aoqi@0: * @since JavaMail 1.2
aoqi@0: */
aoqi@0: public static String getEncoding(DataHandler dh) {
aoqi@0: ContentType cType = null;
aoqi@0: String encoding = null;
aoqi@0:
aoqi@0: /*
aoqi@0: * Try to pick the most efficient means of determining the
aoqi@0: * encoding. If this DataHandler was created using a DataSource,
aoqi@0: * the getEncoding(DataSource) method is typically faster. If
aoqi@0: * the DataHandler was created with an object, this method is
aoqi@0: * much faster. To distinguish the two cases, we use a heuristic.
aoqi@0: * A DataHandler created with an object will always have a null name.
aoqi@0: * A DataHandler created with a DataSource will usually have a
aoqi@0: * non-null name.
aoqi@0: *
aoqi@0: * XXX - This is actually quite a disgusting hack, but it makes
aoqi@0: * a common case run over twice as fast.
aoqi@0: */
aoqi@0: if (dh.getName() != null)
aoqi@0: return getEncoding(dh.getDataSource());
aoqi@0:
aoqi@0: try {
aoqi@0: cType = new ContentType(dh.getContentType());
aoqi@0: } catch (Exception ex) {
aoqi@0: return "base64"; // what else ?!
aoqi@0: }
aoqi@0:
aoqi@0: if (cType.match("text/*")) {
aoqi@0: // Check all of the available bytes
aoqi@0: AsciiOutputStream aos = new AsciiOutputStream(false, false);
aoqi@0: try {
aoqi@0: dh.writeTo(aos);
aoqi@0: } catch (IOException ex) { } // ignore it
aoqi@0: switch (aos.getAscii()) {
aoqi@0: case ALL_ASCII:
aoqi@0: encoding = "7bit"; // all ascii
aoqi@0: break;
aoqi@0: case MOSTLY_ASCII:
aoqi@0: encoding = "quoted-printable"; // mostly ascii
aoqi@0: break;
aoqi@0: default:
aoqi@0: encoding = "base64"; // mostly binary
aoqi@0: break;
aoqi@0: }
aoqi@0: } else { // not "text"
aoqi@0: // Check all of available bytes, break out if we find
aoqi@0: // at least one non-US-ASCII character
aoqi@0: AsciiOutputStream aos =
aoqi@0: new AsciiOutputStream(true, encodeEolStrict);
aoqi@0: try {
aoqi@0: dh.writeTo(aos);
aoqi@0: } catch (IOException ex) { } // ignore it
aoqi@0: if (aos.getAscii() == ALL_ASCII) // all ascii
aoqi@0: encoding = "7bit";
aoqi@0: else // found atleast one non-ascii character, use b64
aoqi@0: encoding = "base64";
aoqi@0: }
aoqi@0:
aoqi@0: return encoding;
aoqi@0: }
aoqi@0:
aoqi@0: /**
aoqi@0: * Decode the given input stream. The Input stream returned is
aoqi@0: * the decoded input stream. All the encodings defined in RFC 2045
aoqi@0: * are supported here. They include "base64", "quoted-printable",
aoqi@0: * "7bit", "8bit", and "binary". In addition, "uuencode" is also
aoqi@0: * supported.
aoqi@0: *
aoqi@0: * @param is input stream
aoqi@0: * @param encoding the encoding of the stream.
aoqi@0: * @return decoded input stream.
aoqi@0: */
aoqi@0: public static InputStream decode(InputStream is, String encoding)
aoqi@0: throws MessagingException {
aoqi@0: if (encoding.equalsIgnoreCase("base64"))
aoqi@0: return new BASE64DecoderStream(is);
aoqi@0: else if (encoding.equalsIgnoreCase("quoted-printable"))
aoqi@0: return new QPDecoderStream(is);
aoqi@0: else if (encoding.equalsIgnoreCase("uuencode") ||
aoqi@0: encoding.equalsIgnoreCase("x-uuencode") ||
aoqi@0: encoding.equalsIgnoreCase("x-uue"))
aoqi@0: return new UUDecoderStream(is);
aoqi@0: else if (encoding.equalsIgnoreCase("binary") ||
aoqi@0: encoding.equalsIgnoreCase("7bit") ||
aoqi@0: encoding.equalsIgnoreCase("8bit"))
aoqi@0: return is;
aoqi@0: else
aoqi@0: throw new MessagingException("Unknown encoding: " + encoding);
aoqi@0: }
aoqi@0:
aoqi@0: /**
aoqi@0: * Wrap an encoder around the given output stream.
aoqi@0: * All the encodings defined in RFC 2045 are supported here.
aoqi@0: * They include "base64", "quoted-printable", "7bit", "8bit" and
aoqi@0: * "binary". In addition, "uuencode" is also supported.
aoqi@0: *
aoqi@0: * @param os output stream
aoqi@0: * @param encoding the encoding of the stream.
aoqi@0: * @return output stream that applies the
aoqi@0: * specified encoding.
aoqi@0: */
aoqi@0: public static OutputStream encode(OutputStream os, String encoding)
aoqi@0: throws MessagingException {
aoqi@0: if (encoding == null)
aoqi@0: return os;
aoqi@0: else if (encoding.equalsIgnoreCase("base64"))
aoqi@0: return new BASE64EncoderStream(os);
aoqi@0: else if (encoding.equalsIgnoreCase("quoted-printable"))
aoqi@0: return new QPEncoderStream(os);
aoqi@0: else if (encoding.equalsIgnoreCase("uuencode") ||
aoqi@0: encoding.equalsIgnoreCase("x-uuencode") ||
aoqi@0: encoding.equalsIgnoreCase("x-uue"))
aoqi@0: return new UUEncoderStream(os);
aoqi@0: else if (encoding.equalsIgnoreCase("binary") ||
aoqi@0: encoding.equalsIgnoreCase("7bit") ||
aoqi@0: encoding.equalsIgnoreCase("8bit"))
aoqi@0: return os;
aoqi@0: else
aoqi@0: throw new MessagingException("Unknown encoding: " +encoding);
aoqi@0: }
aoqi@0:
aoqi@0: /**
aoqi@0: * Wrap an encoder around the given output stream.
aoqi@0: * All the encodings defined in RFC 2045 are supported here.
aoqi@0: * They include "base64", "quoted-printable", "7bit", "8bit" and
aoqi@0: * "binary". In addition, "uuencode" is also supported.
aoqi@0: * The filename
parameter is used with the "uuencode"
aoqi@0: * encoding and is included in the encoded output.
aoqi@0: *
aoqi@0: * @param os output stream
aoqi@0: * @param encoding the encoding of the stream.
aoqi@0: * @param filename name for the file being encoded (only used
aoqi@0: * with uuencode)
aoqi@0: * @return output stream that applies the
aoqi@0: * specified encoding.
aoqi@0: * @since JavaMail 1.2
aoqi@0: */
aoqi@0: public static OutputStream encode(OutputStream os, String encoding,
aoqi@0: String filename)
aoqi@0: throws MessagingException {
aoqi@0: if (encoding == null)
aoqi@0: return os;
aoqi@0: else if (encoding.equalsIgnoreCase("base64"))
aoqi@0: return new BASE64EncoderStream(os);
aoqi@0: else if (encoding.equalsIgnoreCase("quoted-printable"))
aoqi@0: return new QPEncoderStream(os);
aoqi@0: else if (encoding.equalsIgnoreCase("uuencode") ||
aoqi@0: encoding.equalsIgnoreCase("x-uuencode") ||
aoqi@0: encoding.equalsIgnoreCase("x-uue"))
aoqi@0: return new UUEncoderStream(os, filename);
aoqi@0: else if (encoding.equalsIgnoreCase("binary") ||
aoqi@0: encoding.equalsIgnoreCase("7bit") ||
aoqi@0: encoding.equalsIgnoreCase("8bit"))
aoqi@0: return os;
aoqi@0: else
aoqi@0: throw new MessagingException("Unknown encoding: " +encoding);
aoqi@0: }
aoqi@0:
aoqi@0: /**
aoqi@0: * Encode a RFC 822 "text" token into mail-safe form as per
aoqi@0: * RFC 2047.
aoqi@0: * aoqi@0: * The given Unicode string is examined for non US-ASCII aoqi@0: * characters. If the string contains only US-ASCII characters, aoqi@0: * it is returned as-is. If the string contains non US-ASCII aoqi@0: * characters, it is first character-encoded using the platform's aoqi@0: * default charset, then transfer-encoded using either the B or aoqi@0: * Q encoding. The resulting bytes are then returned as a Unicode aoqi@0: * string containing only ASCII characters.
aoqi@0: * aoqi@0: * Note that this method should be used to encode only aoqi@0: * "unstructured" RFC 822 headers.
aoqi@0: * aoqi@0: * Example of usage: aoqi@0: *
aoqi@0: * aoqi@0: * MimeBodyPart part = ... aoqi@0: * String rawvalue = "FooBar Mailer, Japanese version 1.1" aoqi@0: * try { aoqi@0: * // If we know for sure that rawvalue contains only US-ASCII aoqi@0: * // characters, we can skip the encoding part aoqi@0: * part.setHeader("X-mailer", MimeUtility.encodeText(rawvalue)); aoqi@0: * } catch (UnsupportedEncodingException e) { aoqi@0: * // encoding failure aoqi@0: * } catch (MessagingException me) { aoqi@0: * // setHeader() failure aoqi@0: * } aoqi@0: * aoqi@0: *
aoqi@0: * aoqi@0: * @param text unicode string aoqi@0: * @return Unicode string containing only US-ASCII characters aoqi@0: * @exception UnsupportedEncodingException if the encoding fails aoqi@0: */ aoqi@0: public static String encodeText(String text) aoqi@0: throws UnsupportedEncodingException { aoqi@0: return encodeText(text, null, null); aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Encode a RFC 822 "text" token into mail-safe form as per aoqi@0: * RFC 2047.
aoqi@0: * aoqi@0: * The given Unicode string is examined for non US-ASCII aoqi@0: * characters. If the string contains only US-ASCII characters, aoqi@0: * it is returned as-is. If the string contains non US-ASCII aoqi@0: * characters, it is first character-encoded using the specified aoqi@0: * charset, then transfer-encoded using either the B or Q encoding. aoqi@0: * The resulting bytes are then returned as a Unicode string aoqi@0: * containing only ASCII characters.
aoqi@0: * aoqi@0: * Note that this method should be used to encode only aoqi@0: * "unstructured" RFC 822 headers. aoqi@0: * aoqi@0: * @param text the header value aoqi@0: * @param charset the charset. If this parameter is null, the aoqi@0: * platform's default chatset is used. aoqi@0: * @param encoding the encoding to be used. Currently supported aoqi@0: * values are "B" and "Q". If this parameter is null, then aoqi@0: * the "Q" encoding is used if most of characters to be aoqi@0: * encoded are in the ASCII charset, otherwise "B" encoding aoqi@0: * is used. aoqi@0: * @return Unicode string containing only US-ASCII characters aoqi@0: */ aoqi@0: public static String encodeText(String text, String charset, aoqi@0: String encoding) aoqi@0: throws UnsupportedEncodingException { aoqi@0: return encodeWord(text, charset, encoding, false); aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Decode "unstructured" headers, that is, headers that are defined aoqi@0: * as '*text' as per RFC 822.
aoqi@0: * aoqi@0: * The string is decoded using the algorithm specified in aoqi@0: * RFC 2047, Section 6.1.1. If the charset-conversion fails aoqi@0: * for any sequence, an UnsupportedEncodingException is thrown. aoqi@0: * If the String is not an RFC 2047 style encoded header, it is aoqi@0: * returned as-is
aoqi@0: * aoqi@0: * Example of usage: aoqi@0: *
aoqi@0: * aoqi@0: * MimeBodyPart part = ... aoqi@0: * String rawvalue = null; aoqi@0: * String value = null; aoqi@0: * try { aoqi@0: * if ((rawvalue = part.getHeader("X-mailer")[0]) != null) aoqi@0: * value = MimeUtility.decodeText(rawvalue); aoqi@0: * } catch (UnsupportedEncodingException e) { aoqi@0: * // Don't care aoqi@0: * value = rawvalue; aoqi@0: * } catch (MessagingException me) { } aoqi@0: * aoqi@0: * return value; aoqi@0: * aoqi@0: *
aoqi@0: * aoqi@0: * @param etext the possibly encoded value aoqi@0: * @exception UnsupportedEncodingException if the charset aoqi@0: * conversion failed. aoqi@0: */ aoqi@0: public static String decodeText(String etext) aoqi@0: throws UnsupportedEncodingException { aoqi@0: /* aoqi@0: * We look for sequences separated by "linear-white-space". aoqi@0: * (as per RFC 2047, Section 6.1.1) aoqi@0: * RFC 822 defines "linear-white-space" as SPACE | HT | CR | NL. aoqi@0: */ aoqi@0: String lwsp = " \t\n\r"; aoqi@0: StringTokenizer st; aoqi@0: aoqi@0: /* aoqi@0: * First, lets do a quick run thru the string and check aoqi@0: * whether the sequence "=?" exists at all. If none exists, aoqi@0: * we know there are no encoded-words in here and we can just aoqi@0: * return the string as-is, without suffering thru the later aoqi@0: * decoding logic. aoqi@0: * This handles the most common case of unencoded headers aoqi@0: * efficiently. aoqi@0: */ aoqi@0: if (etext.indexOf("=?") == -1) aoqi@0: return etext; aoqi@0: aoqi@0: // Encoded words found. Start decoding ... aoqi@0: aoqi@0: st = new StringTokenizer(etext, lwsp, true); aoqi@0: StringBuffer sb = new StringBuffer(); // decode buffer aoqi@0: StringBuffer wsb = new StringBuffer(); // white space buffer aoqi@0: boolean prevWasEncoded = false; aoqi@0: aoqi@0: while (st.hasMoreTokens()) { aoqi@0: char c; aoqi@0: String s = st.nextToken(); aoqi@0: // If whitespace, append it to the whitespace buffer aoqi@0: if (((c = s.charAt(0)) == ' ') || (c == '\t') || aoqi@0: (c == '\r') || (c == '\n')) aoqi@0: wsb.append(c); aoqi@0: else { aoqi@0: // Check if token is an 'encoded-word' .. aoqi@0: String word; aoqi@0: try { aoqi@0: word = decodeWord(s); aoqi@0: // Yes, this IS an 'encoded-word'. aoqi@0: if (!prevWasEncoded && wsb.length() > 0) { aoqi@0: // if the previous word was also encoded, we aoqi@0: // should ignore the collected whitespace. Else aoqi@0: // we include the whitespace as well. aoqi@0: sb.append(wsb); aoqi@0: } aoqi@0: prevWasEncoded = true; aoqi@0: } catch (ParseException pex) { aoqi@0: // This is NOT an 'encoded-word'. aoqi@0: word = s; aoqi@0: // possibly decode inner encoded words aoqi@0: if (!decodeStrict) aoqi@0: word = decodeInnerWords(word); aoqi@0: // include colleced whitespace .. aoqi@0: if (wsb.length() > 0) aoqi@0: sb.append(wsb); aoqi@0: prevWasEncoded = false; aoqi@0: } aoqi@0: sb.append(word); // append the actual word aoqi@0: wsb.setLength(0); // reset wsb for reuse aoqi@0: } aoqi@0: } aoqi@0: return sb.toString(); aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Encode a RFC 822 "word" token into mail-safe form as per aoqi@0: * RFC 2047.
aoqi@0: * aoqi@0: * The given Unicode string is examined for non US-ASCII aoqi@0: * characters. If the string contains only US-ASCII characters, aoqi@0: * it is returned as-is. If the string contains non US-ASCII aoqi@0: * characters, it is first character-encoded using the platform's aoqi@0: * default charset, then transfer-encoded using either the B or aoqi@0: * Q encoding. The resulting bytes are then returned as a Unicode aoqi@0: * string containing only ASCII characters.
aoqi@0: * aoqi@0: * This method is meant to be used when creating RFC 822 "phrases". aoqi@0: * The InternetAddress class, for example, uses this to encode aoqi@0: * it's 'phrase' component. aoqi@0: * aoqi@0: * @param text unicode string aoqi@0: * @return Array of Unicode strings containing only US-ASCII aoqi@0: * characters. aoqi@0: * @exception UnsupportedEncodingException if the encoding fails aoqi@0: */ aoqi@0: public static String encodeWord(String word) aoqi@0: throws UnsupportedEncodingException { aoqi@0: return encodeWord(word, null, null); aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Encode a RFC 822 "word" token into mail-safe form as per aoqi@0: * RFC 2047.
aoqi@0: * aoqi@0: * The given Unicode string is examined for non US-ASCII aoqi@0: * characters. If the string contains only US-ASCII characters, aoqi@0: * it is returned as-is. If the string contains non US-ASCII aoqi@0: * characters, it is first character-encoded using the specified aoqi@0: * charset, then transfer-encoded using either the B or Q encoding. aoqi@0: * The resulting bytes are then returned as a Unicode string aoqi@0: * containing only ASCII characters.
aoqi@0: * aoqi@0: * @param text unicode string aoqi@0: * @param charset the MIME charset aoqi@0: * @param encoding the encoding to be used. Currently supported aoqi@0: * values are "B" and "Q". If this parameter is null, then aoqi@0: * the "Q" encoding is used if most of characters to be aoqi@0: * encoded are in the ASCII charset, otherwise "B" encoding aoqi@0: * is used. aoqi@0: * @return Unicode string containing only US-ASCII characters aoqi@0: * @exception UnsupportedEncodingException if the encoding fails aoqi@0: */ aoqi@0: public static String encodeWord(String word, String charset, aoqi@0: String encoding) aoqi@0: throws UnsupportedEncodingException { aoqi@0: return encodeWord(word, charset, encoding, true); aoqi@0: } aoqi@0: aoqi@0: /* aoqi@0: * Encode the given string. The parameter 'encodingWord' should aoqi@0: * be true if a RFC 822 "word" token is being encoded and false if a aoqi@0: * RFC 822 "text" token is being encoded. This is because the aoqi@0: * "Q" encoding defined in RFC 2047 has more restrictions when aoqi@0: * encoding "word" tokens. (Sigh) aoqi@0: */ aoqi@0: private static String encodeWord(String string, String charset, aoqi@0: String encoding, boolean encodingWord) aoqi@0: throws UnsupportedEncodingException { aoqi@0: aoqi@0: // If 'string' contains only US-ASCII characters, just aoqi@0: // return it. aoqi@0: int ascii = checkAscii(string); aoqi@0: if (ascii == ALL_ASCII) aoqi@0: return string; aoqi@0: aoqi@0: // Else, apply the specified charset conversion. aoqi@0: String jcharset; aoqi@0: if (charset == null) { // use default charset aoqi@0: jcharset = getDefaultJavaCharset(); // the java charset aoqi@0: charset = getDefaultMIMECharset(); // the MIME equivalent aoqi@0: } else // MIME charset -> java charset aoqi@0: jcharset = javaCharset(charset); aoqi@0: aoqi@0: // If no transfer-encoding is specified, figure one out. aoqi@0: if (encoding == null) { aoqi@0: if (ascii != MOSTLY_NONASCII) aoqi@0: encoding = "Q"; aoqi@0: else aoqi@0: encoding = "B"; aoqi@0: } aoqi@0: aoqi@0: boolean b64; aoqi@0: if (encoding.equalsIgnoreCase("B")) aoqi@0: b64 = true; aoqi@0: else if (encoding.equalsIgnoreCase("Q")) aoqi@0: b64 = false; aoqi@0: else aoqi@0: throw new UnsupportedEncodingException( aoqi@0: "Unknown transfer encoding: " + encoding); aoqi@0: aoqi@0: StringBuffer outb = new StringBuffer(); // the output buffer aoqi@0: doEncode(string, b64, jcharset, aoqi@0: // As per RFC 2047, size of an encoded string should not aoqi@0: // exceed 75 bytes. aoqi@0: // 7 = size of "=?", '?', 'B'/'Q', '?', "?=" aoqi@0: 75 - 7 - charset.length(), // the available space aoqi@0: "=?" + charset + "?" + encoding + "?", // prefix aoqi@0: true, encodingWord, outb); aoqi@0: aoqi@0: return outb.toString(); aoqi@0: } aoqi@0: aoqi@0: private static void doEncode(String string, boolean b64, aoqi@0: String jcharset, int avail, String prefix, aoqi@0: boolean first, boolean encodingWord, StringBuffer buf) aoqi@0: throws UnsupportedEncodingException { aoqi@0: aoqi@0: // First find out what the length of the encoded version of aoqi@0: // 'string' would be. aoqi@0: byte[] bytes = string.getBytes(jcharset); aoqi@0: int len; aoqi@0: if (b64) // "B" encoding aoqi@0: len = BEncoderStream.encodedLength(bytes); aoqi@0: else // "Q" aoqi@0: len = QEncoderStream.encodedLength(bytes, encodingWord); aoqi@0: aoqi@0: int size; aoqi@0: if ((len > avail) && ((size = string.length()) > 1)) { aoqi@0: // If the length is greater than 'avail', split 'string' aoqi@0: // into two and recurse. aoqi@0: doEncode(string.substring(0, size/2), b64, jcharset, aoqi@0: avail, prefix, first, encodingWord, buf); aoqi@0: doEncode(string.substring(size/2, size), b64, jcharset, aoqi@0: avail, prefix, false, encodingWord, buf); aoqi@0: } else { aoqi@0: // length <= than 'avail'. Encode the given string aoqi@0: ByteArrayOutputStream os = new ByteArrayOutputStream(BUFFER_SIZE); aoqi@0: OutputStream eos; // the encoder aoqi@0: if (b64) // "B" encoding aoqi@0: eos = new BEncoderStream(os); aoqi@0: else // "Q" encoding aoqi@0: eos = new QEncoderStream(os, encodingWord); aoqi@0: aoqi@0: try { // do the encoding aoqi@0: eos.write(bytes); aoqi@0: eos.close(); aoqi@0: } catch (IOException ioex) { } aoqi@0: aoqi@0: byte[] encodedBytes = os.toByteArray(); // the encoded stuff aoqi@0: // Now write out the encoded (all ASCII) bytes into our aoqi@0: // StringBuffer aoqi@0: if (!first) // not the first line of this sequence aoqi@0: if (foldEncodedWords) aoqi@0: buf.append("\r\n "); // start a continuation line aoqi@0: else aoqi@0: buf.append(" "); // line will be folded later aoqi@0: aoqi@0: buf.append(prefix); aoqi@0: for (int i = 0; i < encodedBytes.length; i++) aoqi@0: buf.append((char)encodedBytes[i]); aoqi@0: buf.append("?="); // terminate the current sequence aoqi@0: } aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * The string is parsed using the rules in RFC 2047 for parsing aoqi@0: * an "encoded-word". If the parse fails, a ParseException is aoqi@0: * thrown. Otherwise, it is transfer-decoded, and then aoqi@0: * charset-converted into Unicode. If the charset-conversion aoqi@0: * fails, an UnsupportedEncodingException is thrown.
aoqi@0: * aoqi@0: * @param eword the possibly encoded value aoqi@0: * @exception ParseException if the string is not an aoqi@0: * encoded-word as per RFC 2047. aoqi@0: * @exception UnsupportedEncodingException if the charset aoqi@0: * conversion failed. aoqi@0: */ aoqi@0: public static String decodeWord(String eword) aoqi@0: throws ParseException, UnsupportedEncodingException { aoqi@0: aoqi@0: if (!eword.startsWith("=?")) // not an encoded word aoqi@0: throw new ParseException(); aoqi@0: aoqi@0: // get charset aoqi@0: int start = 2; int pos; aoqi@0: if ((pos = eword.indexOf('?', start)) == -1) aoqi@0: throw new ParseException(); aoqi@0: String charset = javaCharset(eword.substring(start, pos)); aoqi@0: aoqi@0: // get encoding aoqi@0: start = pos+1; aoqi@0: if ((pos = eword.indexOf('?', start)) == -1) aoqi@0: throw new ParseException(); aoqi@0: String encoding = eword.substring(start, pos); aoqi@0: aoqi@0: // get encoded-sequence aoqi@0: start = pos+1; aoqi@0: if ((pos = eword.indexOf("?=", start)) == -1) aoqi@0: throw new ParseException(); aoqi@0: String word = eword.substring(start, pos); aoqi@0: aoqi@0: try { aoqi@0: // Extract the bytes from word aoqi@0: ByteArrayInputStream bis = aoqi@0: new ByteArrayInputStream(ASCIIUtility.getBytes(word)); aoqi@0: aoqi@0: // Get the appropriate decoder aoqi@0: InputStream is; aoqi@0: if (encoding.equalsIgnoreCase("B")) aoqi@0: is = new BASE64DecoderStream(bis); aoqi@0: else if (encoding.equalsIgnoreCase("Q")) aoqi@0: is = new QDecoderStream(bis); aoqi@0: else aoqi@0: throw new UnsupportedEncodingException( aoqi@0: "unknown encoding: " + encoding); aoqi@0: aoqi@0: // For b64 & q, size of decoded word <= size of word. So aoqi@0: // the decoded bytes must fit into the 'bytes' array. This aoqi@0: // is certainly more efficient than writing bytes into a aoqi@0: // ByteArrayOutputStream and then pulling out the byte[] aoqi@0: // from it. aoqi@0: int count = bis.available(); aoqi@0: byte[] bytes = new byte[count]; aoqi@0: // count is set to the actual number of decoded bytes aoqi@0: count = is.read(bytes, 0, count); aoqi@0: aoqi@0: // Finally, convert the decoded bytes into a String using aoqi@0: // the specified charset aoqi@0: String s = new String(bytes, 0, count, charset); aoqi@0: if (pos + 2 < eword.length()) { aoqi@0: // there's still more text in the string aoqi@0: String rest = eword.substring(pos + 2); aoqi@0: if (!decodeStrict) aoqi@0: rest = decodeInnerWords(rest); aoqi@0: s += rest; aoqi@0: } aoqi@0: return s; aoqi@0: } catch (UnsupportedEncodingException uex) { aoqi@0: // explicitly catch and rethrow this exception, otherwise aoqi@0: // the below IOException catch will swallow this up! aoqi@0: throw uex; aoqi@0: } catch (IOException ioex) { aoqi@0: // Shouldn't happen. aoqi@0: throw new ParseException(); aoqi@0: } catch (IllegalArgumentException iex) { aoqi@0: /* An unknown charset of the form ISO-XXX-XXX, will cause aoqi@0: * the JDK to throw an IllegalArgumentException ... Since the aoqi@0: * JDK will attempt to create a classname using this string, aoqi@0: * but valid classnames must not contain the character '-', aoqi@0: * and this results in an IllegalArgumentException, rather than aoqi@0: * the expected UnsupportedEncodingException. Yikes aoqi@0: */ aoqi@0: throw new UnsupportedEncodingException(); aoqi@0: } aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Look for encoded words within a word. The MIME spec doesn't aoqi@0: * allow this, but many broken mailers, especially Japanese mailers, aoqi@0: * produce such incorrect encodings. aoqi@0: */ aoqi@0: private static String decodeInnerWords(String word) aoqi@0: throws UnsupportedEncodingException { aoqi@0: int start = 0, i; aoqi@0: StringBuffer buf = new StringBuffer(); aoqi@0: while ((i = word.indexOf("=?", start)) >= 0) { aoqi@0: buf.append(word.substring(start, i)); aoqi@0: int end = word.indexOf("?=", i); aoqi@0: if (end < 0) aoqi@0: break; aoqi@0: String s = word.substring(i, end + 2); aoqi@0: try { aoqi@0: s = decodeWord(s); aoqi@0: } catch (ParseException pex) { aoqi@0: // ignore it, just use the original string aoqi@0: } aoqi@0: buf.append(s); aoqi@0: start = end + 2; aoqi@0: } aoqi@0: if (start == 0) aoqi@0: return word; aoqi@0: if (start < word.length()) aoqi@0: buf.append(word.substring(start)); aoqi@0: return buf.toString(); aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * A utility method to quote a word, if the word contains any aoqi@0: * characters from the specified 'specials' list.
aoqi@0: *
aoqi@0: * The HeaderTokenizer
class defines two special
aoqi@0: * sets of delimiters - MIME and RFC 822.
aoqi@0: *
aoqi@0: * This method is typically used during the generation of
aoqi@0: * RFC 822 and MIME header fields.
aoqi@0: *
aoqi@0: * @param word word to be quoted
aoqi@0: * @param specials the set of special characters
aoqi@0: * @return the possibly quoted word
aoqi@0: * @see javax.mail.internet.HeaderTokenizer#MIME
aoqi@0: * @see javax.mail.internet.HeaderTokenizer#RFC822
aoqi@0: */
aoqi@0: public static String quote(String word, String specials) {
aoqi@0: int len = word.length();
aoqi@0:
aoqi@0: /*
aoqi@0: * Look for any "bad" characters, Escape and
aoqi@0: * quote the entire string if necessary.
aoqi@0: */
aoqi@0: boolean needQuoting = false;
aoqi@0: for (int i = 0; i < len; i++) {
aoqi@0: char c = word.charAt(i);
aoqi@0: if (c == '"' || c == '\\' || c == '\r' || c == '\n') {
aoqi@0: // need to escape them and then quote the whole string
aoqi@0: StringBuffer sb = new StringBuffer(len + 3);
aoqi@0: sb.append('"');
aoqi@0: sb.append(word.substring(0, i));
aoqi@0: int lastc = 0;
aoqi@0: for (int j = i; j < len; j++) {
aoqi@0: char cc = word.charAt(j);
aoqi@0: if ((cc == '"') || (cc == '\\') ||
aoqi@0: (cc == '\r') || (cc == '\n'))
aoqi@0: if (cc == '\n' && lastc == '\r')
aoqi@0: ; // do nothing, CR was already escaped
aoqi@0: else
aoqi@0: sb.append('\\'); // Escape the character
aoqi@0: sb.append(cc);
aoqi@0: lastc = cc;
aoqi@0: }
aoqi@0: sb.append('"');
aoqi@0: return sb.toString();
aoqi@0: } else if (c < 040 || c >= 0177 || specials.indexOf(c) >= 0)
aoqi@0: // These characters cause the string to be quoted
aoqi@0: needQuoting = true;
aoqi@0: }
aoqi@0:
aoqi@0: if (needQuoting) {
aoqi@0: StringBuffer sb = new StringBuffer(len + 2);
aoqi@0: sb.append('"').append(word).append('"');
aoqi@0: return sb.toString();
aoqi@0: } else
aoqi@0: return word;
aoqi@0: }
aoqi@0:
aoqi@0: /**
aoqi@0: * Fold a string at linear whitespace so that each line is no longer
aoqi@0: * than 76 characters, if possible. If there are more than 76
aoqi@0: * non-whitespace characters consecutively, the string is folded at
aoqi@0: * the first whitespace after that sequence. The parameter
aoqi@0: * used
indicates how many characters have been used in
aoqi@0: * the current line; it is usually the length of the header name.
aoqi@0: * aoqi@0: * Note that line breaks in the string aren't escaped; they probably aoqi@0: * should be. aoqi@0: * aoqi@0: * @param used characters used in line so far aoqi@0: * @param s the string to fold aoqi@0: * @return the folded string aoqi@0: */ aoqi@0: /*public*/ static String fold(int used, String s) { aoqi@0: if (!foldText) aoqi@0: return s; aoqi@0: aoqi@0: int end; aoqi@0: char c; aoqi@0: // Strip trailing spaces aoqi@0: for (end = s.length() - 1; end >= 0; end--) { aoqi@0: c = s.charAt(end); aoqi@0: if (c != ' ' && c != '\t') aoqi@0: break; aoqi@0: } aoqi@0: if (end != s.length() - 1) aoqi@0: s = s.substring(0, end + 1); aoqi@0: aoqi@0: // if the string fits now, just return it aoqi@0: if (used + s.length() <= 76) aoqi@0: return s; aoqi@0: aoqi@0: // have to actually fold the string aoqi@0: StringBuffer sb = new StringBuffer(s.length() + 4); aoqi@0: char lastc = 0; aoqi@0: while (used + s.length() > 76) { aoqi@0: int lastspace = -1; aoqi@0: for (int i = 0; i < s.length(); i++) { aoqi@0: if (lastspace != -1 && used + i > 76) aoqi@0: break; aoqi@0: c = s.charAt(i); aoqi@0: if (c == ' ' || c == '\t') aoqi@0: if (!(lastc == ' ' || lastc == '\t')) aoqi@0: lastspace = i; aoqi@0: lastc = c; aoqi@0: } aoqi@0: if (lastspace == -1) { aoqi@0: // no space, use the whole thing aoqi@0: sb.append(s); aoqi@0: s = ""; aoqi@0: used = 0; aoqi@0: break; aoqi@0: } aoqi@0: sb.append(s.substring(0, lastspace)); aoqi@0: sb.append("\r\n"); aoqi@0: lastc = s.charAt(lastspace); aoqi@0: sb.append(lastc); aoqi@0: s = s.substring(lastspace + 1); aoqi@0: used = 1; aoqi@0: } aoqi@0: sb.append(s); aoqi@0: return sb.toString(); aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Unfold a folded header. Any line breaks that aren't escaped and aoqi@0: * are followed by whitespace are removed. aoqi@0: * aoqi@0: * @param s the string to unfold aoqi@0: * @return the unfolded string aoqi@0: */ aoqi@0: /*public*/ static String unfold(String s) { aoqi@0: if (!foldText) aoqi@0: return s; aoqi@0: aoqi@0: StringBuffer sb = null; aoqi@0: int i; aoqi@0: while ((i = indexOfAny(s, "\r\n")) >= 0) { aoqi@0: int start = i; aoqi@0: int l = s.length(); aoqi@0: i++; // skip CR or NL aoqi@0: if (i < l && s.charAt(i - 1) == '\r' && s.charAt(i) == '\n') aoqi@0: i++; // skip LF aoqi@0: if (start == 0 || s.charAt(start - 1) != '\\') { aoqi@0: char c; aoqi@0: // if next line starts with whitespace, skip all of it aoqi@0: // XXX - always has to be true? aoqi@0: if (i < l && ((c = s.charAt(i)) == ' ' || c == '\t')) { aoqi@0: i++; // skip whitespace aoqi@0: while (i < l && ((c = s.charAt(i)) == ' ' || c == '\t')) aoqi@0: i++; aoqi@0: if (sb == null) aoqi@0: sb = new StringBuffer(s.length()); aoqi@0: if (start != 0) { aoqi@0: sb.append(s.substring(0, start)); aoqi@0: sb.append(' '); aoqi@0: } aoqi@0: s = s.substring(i); aoqi@0: continue; aoqi@0: } aoqi@0: // it's not a continuation line, just leave it in aoqi@0: if (sb == null) aoqi@0: sb = new StringBuffer(s.length()); aoqi@0: sb.append(s.substring(0, i)); aoqi@0: s = s.substring(i); aoqi@0: } else { aoqi@0: // there's a backslash at "start - 1" aoqi@0: // strip it out, but leave in the line break aoqi@0: if (sb == null) aoqi@0: sb = new StringBuffer(s.length()); aoqi@0: sb.append(s.substring(0, start - 1)); aoqi@0: sb.append(s.substring(start, i)); aoqi@0: s = s.substring(i); aoqi@0: } aoqi@0: } aoqi@0: if (sb != null) { aoqi@0: sb.append(s); aoqi@0: return sb.toString(); aoqi@0: } else aoqi@0: return s; aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Return the first index of any of the characters in "any" in "s", aoqi@0: * or -1 if none are found. aoqi@0: * aoqi@0: * This should be a method on String. aoqi@0: */ aoqi@0: private static int indexOfAny(String s, String any) { aoqi@0: return indexOfAny(s, any, 0); aoqi@0: } aoqi@0: aoqi@0: private static int indexOfAny(String s, String any, int start) { aoqi@0: try { aoqi@0: int len = s.length(); aoqi@0: for (int i = start; i < len; i++) { aoqi@0: if (any.indexOf(s.charAt(i)) >= 0) aoqi@0: return i; aoqi@0: } aoqi@0: return -1; aoqi@0: } catch (StringIndexOutOfBoundsException e) { aoqi@0: return -1; aoqi@0: } aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Convert a MIME charset name into a valid Java charset name.
aoqi@0: * aoqi@0: * @param charset the MIME charset name aoqi@0: * @return the Java charset equivalent. If a suitable mapping is aoqi@0: * not available, the passed in charset is itself returned. aoqi@0: */ aoqi@0: public static String javaCharset(String charset) { aoqi@0: if (mime2java == null || charset == null) aoqi@0: // no mapping table, or charset parameter is null aoqi@0: return charset; aoqi@0: aoqi@0: String alias = (String)mime2java.get(charset.toLowerCase()); aoqi@0: return alias == null ? charset : alias; aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Convert a java charset into its MIME charset name.
aoqi@0: *
aoqi@0: * Note that a future version of JDK (post 1.2) might provide
aoqi@0: * this functionality, in which case, we may deprecate this
aoqi@0: * method then.
aoqi@0: *
aoqi@0: * @param charset the JDK charset
aoqi@0: * @return the MIME/IANA equivalent. If a mapping
aoqi@0: * is not possible, the passed in charset itself
aoqi@0: * is returned.
aoqi@0: * @since JavaMail 1.1
aoqi@0: */
aoqi@0: public static String mimeCharset(String charset) {
aoqi@0: if (java2mime == null || charset == null)
aoqi@0: // no mapping table or charset param is null
aoqi@0: return charset;
aoqi@0:
aoqi@0: String alias = (String)java2mime.get(charset.toLowerCase());
aoqi@0: return alias == null ? charset : alias;
aoqi@0: }
aoqi@0:
aoqi@0: private static String defaultJavaCharset;
aoqi@0: private static String defaultMIMECharset;
aoqi@0:
aoqi@0: /**
aoqi@0: * Get the default charset corresponding to the system's current
aoqi@0: * default locale. If the System property mail.mime.charset
aoqi@0: * is set, a system charset corresponding to this MIME charset will be
aoqi@0: * returned.
aoqi@0: *
aoqi@0: * @return the default charset of the system's default locale,
aoqi@0: * as a Java charset. (NOT a MIME charset)
aoqi@0: * @since JavaMail 1.1
aoqi@0: */
aoqi@0: public static String getDefaultJavaCharset() {
aoqi@0: if (defaultJavaCharset == null) {
aoqi@0: /*
aoqi@0: * If mail.mime.charset is set, it controls the default
aoqi@0: * Java charset as well.
aoqi@0: */
aoqi@0: String mimecs = null;
aoqi@0:
aoqi@0: mimecs = SAAJUtil.getSystemProperty("mail.mime.charset");
aoqi@0:
aoqi@0: if (mimecs != null && mimecs.length() > 0) {
aoqi@0: defaultJavaCharset = javaCharset(mimecs);
aoqi@0: return defaultJavaCharset;
aoqi@0: }
aoqi@0:
aoqi@0: try {
aoqi@0: defaultJavaCharset = System.getProperty("file.encoding",
aoqi@0: "8859_1");
aoqi@0: } catch (SecurityException sex) {
aoqi@0:
aoqi@0: class NullInputStream extends InputStream {
aoqi@0: public int read() {
aoqi@0: return 0;
aoqi@0: }
aoqi@0: }
aoqi@0: InputStreamReader reader =
aoqi@0: new InputStreamReader(new NullInputStream());
aoqi@0: defaultJavaCharset = reader.getEncoding();
aoqi@0: if (defaultJavaCharset == null)
aoqi@0: defaultJavaCharset = "8859_1";
aoqi@0: }
aoqi@0: }
aoqi@0:
aoqi@0: return defaultJavaCharset;
aoqi@0: }
aoqi@0:
aoqi@0: /*
aoqi@0: * Get the default MIME charset for this locale.
aoqi@0: */
aoqi@0: static String getDefaultMIMECharset() {
aoqi@0: if (defaultMIMECharset == null) {
aoqi@0: defaultMIMECharset = SAAJUtil.getSystemProperty("mail.mime.charset");
aoqi@0: }
aoqi@0: if (defaultMIMECharset == null)
aoqi@0: defaultMIMECharset = mimeCharset(getDefaultJavaCharset());
aoqi@0: return defaultMIMECharset;
aoqi@0: }
aoqi@0:
aoqi@0: // Tables to map MIME charset names to Java names and vice versa.
aoqi@0: // XXX - Should eventually use J2SE 1.4 java.nio.charset.Charset
aoqi@0: private static Hashtable mime2java;
aoqi@0: private static Hashtable java2mime;
aoqi@0:
aoqi@0: static {
aoqi@0: java2mime = new Hashtable(40);
aoqi@0: mime2java = new Hashtable(10);
aoqi@0:
aoqi@0: try {
aoqi@0: // Use this class's classloader to load the mapping file
aoqi@0: // XXX - we should use SecuritySupport, but it's in another package
aoqi@0: InputStream is =
aoqi@0: com.sun.xml.internal.messaging.saaj.packaging.mime.internet.MimeUtility.class.getResourceAsStream(
aoqi@0: "/META-INF/javamail.charset.map");
aoqi@0:
aoqi@0: if (is != null) {
aoqi@0: is = new LineInputStream(is);
aoqi@0:
aoqi@0: // Load the JDK-to-MIME charset mapping table
aoqi@0: loadMappings((LineInputStream)is, java2mime);
aoqi@0:
aoqi@0: // Load the MIME-to-JDK charset mapping table
aoqi@0: loadMappings((LineInputStream)is, mime2java);
aoqi@0: }
aoqi@0: } catch (Exception ex) { }
aoqi@0:
aoqi@0: // If we didn't load the tables, e.g., because we didn't have
aoqi@0: // permission, load them manually. The entries here should be
aoqi@0: // the same as the default javamail.charset.map.
aoqi@0: if (java2mime.isEmpty()) {
aoqi@0: java2mime.put("8859_1", "ISO-8859-1");
aoqi@0: java2mime.put("iso8859_1", "ISO-8859-1");
aoqi@0: java2mime.put("ISO8859-1", "ISO-8859-1");
aoqi@0:
aoqi@0: java2mime.put("8859_2", "ISO-8859-2");
aoqi@0: java2mime.put("iso8859_2", "ISO-8859-2");
aoqi@0: java2mime.put("ISO8859-2", "ISO-8859-2");
aoqi@0:
aoqi@0: java2mime.put("8859_3", "ISO-8859-3");
aoqi@0: java2mime.put("iso8859_3", "ISO-8859-3");
aoqi@0: java2mime.put("ISO8859-3", "ISO-8859-3");
aoqi@0:
aoqi@0: java2mime.put("8859_4", "ISO-8859-4");
aoqi@0: java2mime.put("iso8859_4", "ISO-8859-4");
aoqi@0: java2mime.put("ISO8859-4", "ISO-8859-4");
aoqi@0:
aoqi@0: java2mime.put("8859_5", "ISO-8859-5");
aoqi@0: java2mime.put("iso8859_5", "ISO-8859-5");
aoqi@0: java2mime.put("ISO8859-5", "ISO-8859-5");
aoqi@0:
aoqi@0: java2mime.put("8859_6", "ISO-8859-6");
aoqi@0: java2mime.put("iso8859_6", "ISO-8859-6");
aoqi@0: java2mime.put("ISO8859-6", "ISO-8859-6");
aoqi@0:
aoqi@0: java2mime.put("8859_7", "ISO-8859-7");
aoqi@0: java2mime.put("iso8859_7", "ISO-8859-7");
aoqi@0: java2mime.put("ISO8859-7", "ISO-8859-7");
aoqi@0:
aoqi@0: java2mime.put("8859_8", "ISO-8859-8");
aoqi@0: java2mime.put("iso8859_8", "ISO-8859-8");
aoqi@0: java2mime.put("ISO8859-8", "ISO-8859-8");
aoqi@0:
aoqi@0: java2mime.put("8859_9", "ISO-8859-9");
aoqi@0: java2mime.put("iso8859_9", "ISO-8859-9");
aoqi@0: java2mime.put("ISO8859-9", "ISO-8859-9");
aoqi@0:
aoqi@0: java2mime.put("SJIS", "Shift_JIS");
aoqi@0: java2mime.put("MS932", "Shift_JIS");
aoqi@0: java2mime.put("JIS", "ISO-2022-JP");
aoqi@0: java2mime.put("ISO2022JP", "ISO-2022-JP");
aoqi@0: java2mime.put("EUC_JP", "euc-jp");
aoqi@0: java2mime.put("KOI8_R", "koi8-r");
aoqi@0: java2mime.put("EUC_CN", "euc-cn");
aoqi@0: java2mime.put("EUC_TW", "euc-tw");
aoqi@0: java2mime.put("EUC_KR", "euc-kr");
aoqi@0: }
aoqi@0: if (mime2java.isEmpty()) {
aoqi@0: mime2java.put("iso-2022-cn", "ISO2022CN");
aoqi@0: mime2java.put("iso-2022-kr", "ISO2022KR");
aoqi@0: mime2java.put("utf-8", "UTF8");
aoqi@0: mime2java.put("utf8", "UTF8");
aoqi@0: mime2java.put("ja_jp.iso2022-7", "ISO2022JP");
aoqi@0: mime2java.put("ja_jp.eucjp", "EUCJIS");
aoqi@0: mime2java.put("euc-kr", "KSC5601");
aoqi@0: mime2java.put("euckr", "KSC5601");
aoqi@0: mime2java.put("us-ascii", "ISO-8859-1");
aoqi@0: mime2java.put("x-us-ascii", "ISO-8859-1");
aoqi@0: }
aoqi@0: }
aoqi@0:
aoqi@0: private static void loadMappings(LineInputStream is, Hashtable table) {
aoqi@0: String currLine;
aoqi@0:
aoqi@0: while (true) {
aoqi@0: try {
aoqi@0: currLine = is.readLine();
aoqi@0: } catch (IOException ioex) {
aoqi@0: break; // error in reading, stop
aoqi@0: }
aoqi@0:
aoqi@0: if (currLine == null) // end of file, stop
aoqi@0: break;
aoqi@0: if (currLine.startsWith("--") && currLine.endsWith("--"))
aoqi@0: // end of this table
aoqi@0: break;
aoqi@0:
aoqi@0: // ignore empty lines and comments
aoqi@0: if (currLine.trim().length() == 0 || currLine.startsWith("#"))
aoqi@0: continue;
aoqi@0:
aoqi@0: // A valid entry is of the form max
bytes are checked. If max
is
aoqi@0: * set to ALL
, then all the bytes available in this
aoqi@0: * input stream are checked. If breakOnNonAscii
is true
aoqi@0: * the check terminates when the first non-US-ASCII character is
aoqi@0: * found and MOSTLY_NONASCII is returned. Else, the check continues
aoqi@0: * till max
bytes or till the end of stream.
aoqi@0: *
aoqi@0: * @param is the input stream
aoqi@0: * @param max maximum bytes to check for. The special value
aoqi@0: * ALL indicates that all the bytes in this input
aoqi@0: * stream must be checked.
aoqi@0: * @param breakOnNonAscii if true
, then terminate the
aoqi@0: * the check when the first non-US-ASCII character
aoqi@0: * is found.
aoqi@0: * @return ALL_ASCII if all characters in the string
aoqi@0: * belong to the US-ASCII charset. MOSTLY_ASCII
aoqi@0: * if more than half of the available characters
aoqi@0: * are US-ASCII characters. Else MOSTLY_NONASCII.
aoqi@0: */
aoqi@0: static int checkAscii(InputStream is, int max, boolean breakOnNonAscii) {
aoqi@0: int ascii = 0, non_ascii = 0;
aoqi@0: int len;
aoqi@0: int block = 4096;
aoqi@0: int linelen = 0;
aoqi@0: boolean longLine = false, badEOL = false;
aoqi@0: boolean checkEOL = encodeEolStrict && breakOnNonAscii;
aoqi@0: byte buf[] = null;
aoqi@0: if (max != 0) {
aoqi@0: block = (max == ALL) ? 4096 : Math.min(max, 4096);
aoqi@0: buf = new byte[block];
aoqi@0: }
aoqi@0: while (max != 0) {
aoqi@0: try {
aoqi@0: if ((len = is.read(buf, 0, block)) == -1)
aoqi@0: break;
aoqi@0: int lastb = 0;
aoqi@0: for (int i = 0; i < len; i++) {
aoqi@0: // The '&' operator automatically causes b[i] to
aoqi@0: // be promoted to an int, and we mask out the higher
aoqi@0: // bytes in the int so that the resulting value is
aoqi@0: // not a negative integer.
aoqi@0: int b = buf[i] & 0xff;
aoqi@0: if (checkEOL &&
aoqi@0: ((lastb == '\r' && b != '\n') ||
aoqi@0: (lastb != '\r' && b == '\n')))
aoqi@0: badEOL = true;
aoqi@0: if (b == '\r' || b == '\n')
aoqi@0: linelen = 0;
aoqi@0: else {
aoqi@0: linelen++;
aoqi@0: if (linelen > 998) // 1000 - CRLF
aoqi@0: longLine = true;
aoqi@0: }
aoqi@0: if (nonascii(b)) { // non-ascii
aoqi@0: if (breakOnNonAscii) // we are done
aoqi@0: return MOSTLY_NONASCII;
aoqi@0: else
aoqi@0: non_ascii++;
aoqi@0: } else
aoqi@0: ascii++;
aoqi@0: lastb = b;
aoqi@0: }
aoqi@0: } catch (IOException ioex) {
aoqi@0: break;
aoqi@0: }
aoqi@0: if (max != ALL)
aoqi@0: max -= len;
aoqi@0: }
aoqi@0:
aoqi@0: if (max == 0 && breakOnNonAscii)
aoqi@0: // We have been told to break on the first non-ascii character.
aoqi@0: // We haven't got any non-ascii character yet, but then we
aoqi@0: // have not checked all of the available bytes either. So we
aoqi@0: // cannot say for sure that this input stream is ALL_ASCII,
aoqi@0: // and hence we must play safe and return MOSTLY_NONASCII
aoqi@0:
aoqi@0: return MOSTLY_NONASCII;
aoqi@0:
aoqi@0: if (non_ascii == 0) { // no non-us-ascii characters so far
aoqi@0: // If we're looking at non-text data, and we saw CR without LF
aoqi@0: // or vice versa, consider this mostly non-ASCII so that it
aoqi@0: // will be base64 encoded (since the quoted-printable encoder
aoqi@0: // doesn't encode this case properly).
aoqi@0: if (badEOL)
aoqi@0: return MOSTLY_NONASCII;
aoqi@0: // if we've seen a long line, we degrade to mostly ascii
aoqi@0: else if (longLine)
aoqi@0: return MOSTLY_ASCII;
aoqi@0: else
aoqi@0: return ALL_ASCII;
aoqi@0: }
aoqi@0: if (ascii > non_ascii) // mostly ascii
aoqi@0: return MOSTLY_ASCII;
aoqi@0: return MOSTLY_NONASCII;
aoqi@0: }
aoqi@0:
aoqi@0: static final boolean nonascii(int b) {
aoqi@0: return b >= 0177 || (b < 040 && b != '\r' && b != '\n' && b != '\t');
aoqi@0: }
aoqi@0: }
aoqi@0:
aoqi@0: /**
aoqi@0: * An OutputStream that determines whether the data written to
aoqi@0: * it is all ASCII, mostly ASCII, or mostly non-ASCII.
aoqi@0: */
aoqi@0: class AsciiOutputStream extends OutputStream {
aoqi@0: private boolean breakOnNonAscii;
aoqi@0: private int ascii = 0, non_ascii = 0;
aoqi@0: private int linelen = 0;
aoqi@0: private boolean longLine = false;
aoqi@0: private boolean badEOL = false;
aoqi@0: private boolean checkEOL = false;
aoqi@0: private int lastb = 0;
aoqi@0: private int ret = 0;
aoqi@0:
aoqi@0: public AsciiOutputStream(boolean breakOnNonAscii, boolean encodeEolStrict) {
aoqi@0: this.breakOnNonAscii = breakOnNonAscii;
aoqi@0: checkEOL = encodeEolStrict && breakOnNonAscii;
aoqi@0: }
aoqi@0:
aoqi@0: public void write(int b) throws IOException {
aoqi@0: check(b);
aoqi@0: }
aoqi@0:
aoqi@0: public void write(byte b[]) throws IOException {
aoqi@0: write(b, 0, b.length);
aoqi@0: }
aoqi@0:
aoqi@0: public void write(byte b[], int off, int len) throws IOException {
aoqi@0: len += off;
aoqi@0: for (int i = off; i < len ; i++)
aoqi@0: check(b[i]);
aoqi@0: }
aoqi@0:
aoqi@0: private final void check(int b) throws IOException {
aoqi@0: b &= 0xff;
aoqi@0: if (checkEOL &&
aoqi@0: ((lastb == '\r' && b != '\n') || (lastb != '\r' && b == '\n')))
aoqi@0: badEOL = true;
aoqi@0: if (b == '\r' || b == '\n')
aoqi@0: linelen = 0;
aoqi@0: else {
aoqi@0: linelen++;
aoqi@0: if (linelen > 998) // 1000 - CRLF
aoqi@0: longLine = true;
aoqi@0: }
aoqi@0: if (MimeUtility.nonascii(b)) { // non-ascii
aoqi@0: non_ascii++;
aoqi@0: if (breakOnNonAscii) { // we are done
aoqi@0: ret = MimeUtility.MOSTLY_NONASCII;
aoqi@0: throw new EOFException();
aoqi@0: }
aoqi@0: } else
aoqi@0: ascii++;
aoqi@0: lastb = b;
aoqi@0: }
aoqi@0:
aoqi@0: /**
aoqi@0: * Return ASCII-ness of data stream.
aoqi@0: */
aoqi@0: public int getAscii() {
aoqi@0: if (ret != 0)
aoqi@0: return ret;
aoqi@0: // If we're looking at non-text data, and we saw CR without LF
aoqi@0: // or vice versa, consider this mostly non-ASCII so that it
aoqi@0: // will be base64 encoded (since the quoted-printable encoder
aoqi@0: // doesn't encode this case properly).
aoqi@0: if (badEOL)
aoqi@0: return MimeUtility.MOSTLY_NONASCII;
aoqi@0: else if (non_ascii == 0) { // no non-us-ascii characters so far
aoqi@0: // if we've seen a long line, we degrade to mostly ascii
aoqi@0: if (longLine)
aoqi@0: return MimeUtility.MOSTLY_ASCII;
aoqi@0: else
aoqi@0: return MimeUtility.ALL_ASCII;
aoqi@0: }
aoqi@0: if (ascii > non_ascii) // mostly ascii
aoqi@0: return MimeUtility.MOSTLY_ASCII;
aoqi@0: return MimeUtility.MOSTLY_NONASCII;
aoqi@0: }
aoqi@0: }