jdk8-mips64-public/jaxws: src/share/jaxws_classes/com/sun/xml/internal/messaging/saaj/packaging/mime/internet/HeaderTokenizer.java@9c07ef4934dd

     1 /*

     2  * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.  Oracle designates this

     8  * particular file as subject to the "Classpath" exception as provided

     9  * by Oracle in the LICENSE file that accompanied this code.

    10  *

    11  * This code is distributed in the hope that it will be useful, but WITHOUT

    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    14  * version 2 for more details (a copy is included in the LICENSE file that

    15  * accompanied this code).

    16  *

    17  * You should have received a copy of the GNU General Public License version

    18  * 2 along with this work; if not, write to the Free Software Foundation,

    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    20  *

    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    22  * or visit www.oracle.com if you need additional information or have any

    23  * questions.

    24  */

    26 /*

    27  * @(#)HeaderTokenizer.java   1.9 02/03/27

    28  */

    32 package com.sun.xml.internal.messaging.saaj.packaging.mime.internet;

    35 /**

    36  * This class tokenizes RFC822 and MIME headers into the basic

    37  * symbols specified by RFC822 and MIME. <p>

    38  *

    39  * This class handles folded headers (ie headers with embedded

    40  * CRLF SPACE sequences). The folds are removed in the returned

    41  * tokens.

    42  *

    43  * @version 1.9, 02/03/27

    44  * @author  John Mani

    45  */

    47 public class HeaderTokenizer {

    49     /**

    50      * The Token class represents tokens returned by the

    51      * HeaderTokenizer.

    52      */

    53     public static class Token {

    55         private int type;

    56         private String value;

    58         /**

    59          * Token type indicating an ATOM.

    60          */

    61         public static final int ATOM            = -1;

    63         /**

    64          * Token type indicating a quoted string. The value

    65          * field contains the string without the quotes.

    66          */

    67         public static final int QUOTEDSTRING    = -2;

    69         /**

    70          * Token type indicating a comment. The value field

    71          * contains the comment string without the comment

    72          * start and end symbols.

    73          */

    74         public static final int COMMENT         = -3;

    76         /**

    77          * Token type indicating end of input.

    78          */

    79         public static final int  EOF            = -4;

    81         /**

    82          * Constructor.

    83          * @param       type    Token type

    84          * @param       value   Token value

    85          */

    86         public Token(int type, String value) {

    87              this.type = type;

    88              this.value = value;

    89         }

    91         /**

    92          * Return the type of the token. If the token represents a

    93          * delimiter or a control character, the type is that character

    94          * itself, converted to an integer. Otherwise, it's value is

    95          * one of the following:

    96          * <ul>

    97          * <li><code>ATOM</code> A sequence of ASCII characters

    98          *      delimited by either SPACE, CTL, "(", <"> or the

    99          *      specified SPECIALS

   100          * <li><code>QUOTEDSTRING</code> A sequence of ASCII characters

   101          *      within quotes

   102          * <li><code>COMMENT</code> A sequence of ASCII characters

   103          *      within "(" and ")".

   104          * <li><code>EOF</code> End of header

   105          * </ul>

   106          */

   107         public int getType() {

   108             return type;

   109         }

   111         /**

   112          * Returns the value of the token just read. When the current

   113          * token is a quoted string, this field contains the body of the

   114          * string, without the quotes. When the current token is a comment,

   115          * this field contains the body of the comment.

   116          *

   117          * @return      token value

   118          */

   119         public String getValue() {

   120             return value;

   121         }

   122     }

   124     private String string; // the string to be tokenized

   125     private boolean skipComments; // should comments be skipped ?

   126     private String delimiters; // delimiter string

   127     private int currentPos; // current parse position

   128     private int maxPos; // string length

   129     private int nextPos; // track start of next Token for next()

   130     private int peekPos; // track start of next Token for peek()

   132     /**

   133      * RFC822 specials

   134      */

   135     public final static String RFC822 = "()<>@,;:\\\"\t .[]";

   137     /**

   138      * MIME specials

   139      */

   140     public final static String MIME = "()<>@,;:\\\"\t []/?=";

   142     // The EOF Token

   143     private final static Token EOFToken = new Token(Token.EOF, null);

   145     /**

   146      * Constructor that takes a rfc822 style header.

   147      *

   148      * @param   header  The rfc822 header to be tokenized

   149      * @param   delimiters      Set of delimiter characters

   150      *                          to be used to delimit ATOMS. These

   151      *                          are usually <code>RFC822</code> or

   152      *                          <code>MIME</code>

   153      * @param   skipComments  If true, comments are skipped and

   154      *                          not returned as tokens

   155      */

   156     public HeaderTokenizer(String header, String delimiters,

   157                            boolean skipComments) {

   158         string = (header == null) ? "" : header; // paranoia ?!

   159         this.skipComments = skipComments;

   160         this.delimiters = delimiters;

   161         currentPos = nextPos = peekPos = 0;

   162         maxPos = string.length();

   163     }

   165     /**

   166      * Constructor. Comments are ignored and not returned as tokens

   167      *

   168      * @param   header  The header that is tokenized

   169      * @param   delimiters  The delimiters to be used

   170      */

   171     public HeaderTokenizer(String header, String delimiters) {

   172         this(header, delimiters, true);

   173     }

   175     /**

   176      * Constructor. The RFC822 defined delimiters - RFC822 - are

   177      * used to delimit ATOMS. Also comments are skipped and not

   178      * returned as tokens

   179      */

   180     public HeaderTokenizer(String header)  {

   181         this(header, RFC822);

   182     }

   184     /**

   185      * Parses the next token from this String. <p>

   186      *

   187      * Clients sit in a loop calling next() to parse successive

   188      * tokens until an EOF Token is returned.

   189      *

   190      * @return          the next Token

   191      * @exception       ParseException if the parse fails

   192      */

   193     public Token next() throws ParseException {

   194         Token tk;

   196         currentPos = nextPos; // setup currentPos

   197         tk = getNext();

   198         nextPos = peekPos = currentPos; // update currentPos and peekPos

   199         return tk;

   200     }

   202     /**

   203      * Peek at the next token, without actually removing the token

   204      * from the parse stream. Invoking this method multiple times

   205      * will return successive tokens, until <code>next()</code> is

   206      * called. <p>

   207      *

   208      * @return          the next Token

   209      * @exception       ParseException if the parse fails

   210      */

   211     public Token peek() throws ParseException {

   212         Token tk;

   214         currentPos = peekPos; // setup currentPos

   215         tk = getNext();

   216         peekPos = currentPos; // update peekPos

   217         return tk;

   218     }

   220     /**

   221      * Return the rest of the Header.

   222      *

   223      * @return String   rest of header. null is returned if we are

   224      *                  already at end of header

   225      */

   226     public String getRemainder() {

   227         return string.substring(nextPos);

   228     }

   230     /*

   231      * Return the next token starting from 'currentPos'. After the

   232      * parse, 'currentPos' is updated to point to the start of the

   233      * next token.

   234      */

   235     private Token getNext() throws ParseException {

   236         // If we're already at end of string, return EOF

   237         if (currentPos >= maxPos)

   238             return EOFToken;

   240         // Skip white-space, position currentPos beyond the space

   241         if (skipWhiteSpace() == Token.EOF)

   242             return EOFToken;

   244         char c;

   245         int start;

   246         boolean filter = false;

   248         c = string.charAt(currentPos);

   250         // Check or Skip comments and position currentPos

   251         // beyond the comment

   252         while (c == '(') {

   253             // Parsing comment ..

   254             int nesting;

   255             for (start = ++currentPos, nesting = 1;

   256                  nesting > 0 && currentPos < maxPos;

   257                  currentPos++) {

   258                 c = string.charAt(currentPos);

   259                 if (c == '\\') {  // Escape sequence

   260                     currentPos++; // skip the escaped character

   261                     filter = true;

   262                 } else if (c == '\r')

   263                     filter = true;

   264                 else if (c == '(')

   265                     nesting++;

   266                 else if (c == ')')

   267                     nesting--;

   268             }

   269             if (nesting != 0)

   270                 throw new ParseException("Unbalanced comments");

   272             if (!skipComments) {

   273                 // Return the comment, if we are asked to.

   274                 // Note that the comment start & end markers are ignored.

   275                 String s;

   276                 if (filter) // need to go thru the token again.

   277                     s = filterToken(string, start, currentPos-1);

   278                 else

   279                     s = string.substring(start,currentPos-1);

   281                 return new Token(Token.COMMENT, s);

   282             }

   284             // Skip any whitespace after the comment.

   285             if (skipWhiteSpace() == Token.EOF)

   286                 return EOFToken;

   287             c = string.charAt(currentPos);

   288         }

   290         // Check for quoted-string and position currentPos

   291         //  beyond the terminating quote

   292         if (c == '"') {

   293             for (start = ++currentPos; currentPos < maxPos; currentPos++) {

   294                 c = string.charAt(currentPos);

   295                 if (c == '\\') { // Escape sequence

   296                     currentPos++;

   297                     filter = true;

   298                 } else if (c == '\r')

   299                     filter = true;

   300                 else if (c == '"') {

   301                     currentPos++;

   302                     String s;

   304                     if (filter)

   305                         s = filterToken(string, start, currentPos-1);

   306                     else

   307                         s = string.substring(start,currentPos-1);

   309                     return new Token(Token.QUOTEDSTRING, s);

   310                 }

   311             }

   312             throw new ParseException("Unbalanced quoted string");

   313         }

   315         // Check for SPECIAL or CTL

   316         if (c < 040 || c >= 0177 || delimiters.indexOf(c) >= 0) {

   317             currentPos++; // re-position currentPos

   318             char ch[] = new char[1];

   319             ch[0] = c;

   320             return new Token((int)c, new String(ch));

   321         }

   323         // Check for ATOM

   324         for (start = currentPos; currentPos < maxPos; currentPos++) {

   325             c = string.charAt(currentPos);

   326             // ATOM is delimited by either SPACE, CTL, "(", <">

   327             // or the specified SPECIALS

   328             if (c < 040 || c >= 0177 || c == '(' || c == ' ' ||

   329                 c == '"' || delimiters.indexOf(c) >= 0)

   330                 break;

   331         }

   332         return new Token(Token.ATOM, string.substring(start, currentPos));

   333     }

   335     // Skip SPACE, HT, CR and NL

   336     private int skipWhiteSpace() {

   337         char c;

   338         for (; currentPos < maxPos; currentPos++)

   339             if (((c = string.charAt(currentPos)) != ' ') &&

   340                 (c != '\t') && (c != '\r') && (c != '\n'))

   341                 return currentPos;

   342         return Token.EOF;

   343     }

   345     /* Process escape sequences and embedded LWSPs from a comment or

   346      * quoted string.

   347      */

   348     private static String filterToken(String s, int start, int end) {

   349         StringBuffer sb = new StringBuffer();

   350         char c;

   351         boolean gotEscape = false;

   352         boolean gotCR = false;

   354         for (int i = start; i < end; i++) {

   355             c = s.charAt(i);

   356             if (c == '\n' && gotCR) {

   357                 // This LF is part of an unescaped

   358                 // CRLF sequence (i.e, LWSP). Skip it.

   359                 gotCR = false;

   360                 continue;

   361             }

   363             gotCR = false;

   364             if (!gotEscape) {

   365                 // Previous character was NOT '\'

   366                 if (c == '\\') // skip this character

   367                     gotEscape = true;

   368                 else if (c == '\r') // skip this character

   369                     gotCR = true;

   370                 else // append this character

   371                     sb.append(c);

   372             } else {

   373                 // Previous character was '\'. So no need to

   374                 // bother with any special processing, just

   375                 // append this character

   376                 sb.append(c);

   377                 gotEscape = false;

   378             }

   379         }

   380         return sb.toString();

   381     }

   382 }

Mercurial > jdk8-mips64-public > jaxws / file revision

src/share/jaxws_classes/com/sun/xml/internal/messaging/saaj/packaging/mime/internet/HeaderTokenizer.java@9c07ef4934dd

src/share/jaxws_classes/com/sun/xml/internal/messaging/saaj/packaging/mime/internet/HeaderTokenizer.java