jdk8-mips64-public/jaxws: src/share/jaxws_classes/com/sun/xml/internal/ws/encoding/HeaderTokenizer.java@373ffda63c9a

Initial load
http://hg.openjdk.java.net/jdk8u/jdk8u/jaxws/
changeset: 657:d47a47f961ee
tag: jdk8u25-b17

     1 /*

     2  * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.  Oracle designates this

     8  * particular file as subject to the "Classpath" exception as provided

     9  * by Oracle in the LICENSE file that accompanied this code.

    10  *

    11  * This code is distributed in the hope that it will be useful, but WITHOUT

    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    14  * version 2 for more details (a copy is included in the LICENSE file that

    15  * accompanied this code).

    16  *

    17  * You should have received a copy of the GNU General Public License version

    18  * 2 along with this work; if not, write to the Free Software Foundation,

    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    20  *

    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    22  * or visit www.oracle.com if you need additional information or have any

    23  * questions.

    24  */

    26 package com.sun.xml.internal.ws.encoding;

    28 import javax.xml.ws.WebServiceException;

    30 /**

    31  * This class tokenizes RFC822 and MIME headers into the basic

    32  * symbols specified by RFC822 and MIME. <p>

    33  *

    34  * This class handles folded headers (ie headers with embedded

    35  * CRLF SPACE sequences). The folds are removed in the returned

    36  * tokens.

    37  *

    38  * @version 1.9, 02/03/27

    39  * @author  John Mani

    40  */

    42 class HeaderTokenizer {

    44     /**

    45      * The Token class represents tokens returned by the

    46      * HeaderTokenizer.

    47      */

    48     static class Token {

    50         private int type;

    51         private String value;

    53         /**

    54          * Token type indicating an ATOM.

    55          */

    56         public static final int ATOM            = -1;

    58         /**

    59          * Token type indicating a quoted string. The value

    60          * field contains the string without the quotes.

    61          */

    62         public static final int QUOTEDSTRING    = -2;

    64         /**

    65          * Token type indicating a comment. The value field

    66          * contains the comment string without the comment

    67          * start and end symbols.

    68          */

    69         public static final int COMMENT         = -3;

    71         /**

    72          * Token type indicating end of input.

    73          */

    74         public static final int  EOF            = -4;

    76         /**

    77          * Constructor.

    78          * @param       type    Token type

    79          * @param       value   Token value

    80          */

    81         public Token(int type, String value) {

    82              this.type = type;

    83              this.value = value;

    84         }

    86         /**

    87          * Return the type of the token. If the token represents a

    88          * delimiter or a control character, the type is that character

    89          * itself, converted to an integer. Otherwise, it's value is

    90          * one of the following:

    91          * <ul>

    92          * <li><code>ATOM</code> A sequence of ASCII characters

    93          *      delimited by either SPACE, CTL, "(", <"> or the

    94          *      specified SPECIALS

    95          * <li><code>QUOTEDSTRING</code> A sequence of ASCII characters

    96          *      within quotes

    97          * <li><code>COMMENT</code> A sequence of ASCII characters

    98          *      within "(" and ")".

    99          * <li><code>EOF</code> End of header

   100          * </ul>

   101          */

   102         public int getType() {

   103             return type;

   104         }

   106         /**

   107          * Returns the value of the token just read. When the current

   108          * token is a quoted string, this field contains the body of the

   109          * string, without the quotes. When the current token is a comment,

   110          * this field contains the body of the comment.

   111          *

   112          * @return      token value

   113          */

   114         public String getValue() {

   115             return value;

   116         }

   117     }

   119     private String string; // the string to be tokenized

   120     private boolean skipComments; // should comments be skipped ?

   121     private String delimiters; // delimiter string

   122     private int currentPos; // current parse position

   123     private int maxPos; // string length

   124     private int nextPos; // track start of next Token for next()

   125     private int peekPos; // track start of next Token for peek()

   127     /**

   128      * RFC822 specials

   129      */

   130     private final static String RFC822 = "()<>@,;:\\\"\t .[]";

   132     /**

   133      * MIME specials

   134      */

   135     final static String MIME = "()<>@,;:\\\"\t []/?=";

   137     // The EOF Token

   138     private final static Token EOFToken = new Token(Token.EOF, null);

   140     /**

   141      * Constructor that takes a rfc822 style header.

   142      *

   143      * @param   header  The rfc822 header to be tokenized

   144      * @param   delimiters      Set of delimiter characters

   145      *                          to be used to delimit ATOMS. These

   146      *                          are usually <code>RFC822</code> or

   147      *                          <code>MIME</code>

   148      * @param   skipComments  If true, comments are skipped and

   149      *                          not returned as tokens

   150      */

   151     HeaderTokenizer(String header, String delimiters,

   152                            boolean skipComments) {

   153         string = (header == null) ? "" : header; // paranoia ?!

   154         this.skipComments = skipComments;

   155         this.delimiters = delimiters;

   156         currentPos = nextPos = peekPos = 0;

   157         maxPos = string.length();

   158     }

   160     /**

   161      * Constructor. Comments are ignored and not returned as tokens

   162      *

   163      * @param   header  The header that is tokenized

   164      * @param   delimiters  The delimiters to be used

   165      */

   166     HeaderTokenizer(String header, String delimiters) {

   167             this(header, delimiters, true);

   168     }

   170     /**

   171      * Constructor. The RFC822 defined delimiters - RFC822 - are

   172      * used to delimit ATOMS. Also comments are skipped and not

   173      * returned as tokens

   174      */

   175     HeaderTokenizer(String header)  {

   176             this(header, RFC822);

   177     }

   179     /**

   180      * Parses the next token from this String. <p>

   181      *

   182      * Clients sit in a loop calling next() to parse successive

   183      * tokens until an EOF Token is returned.

   184      *

   185      * @return          the next Token

   186      * @exception WebServiceException if the parse fails

   187      */

   188     Token next() throws WebServiceException {

   189         Token tk;

   191         currentPos = nextPos; // setup currentPos

   192         tk = getNext();

   193         nextPos = peekPos = currentPos; // update currentPos and peekPos

   194         return tk;

   195     }

   197     /**

   198      * Peek at the next token, without actually removing the token

   199      * from the parse stream. Invoking this method multiple times

   200      * will return successive tokens, until <code>next()</code> is

   201      * called. <p>

   202      *

   203      * @return          the next Token

   204      * @exception       WebServiceException if the parse fails

   205      */

   206     Token peek() throws WebServiceException {

   207         Token tk;

   209         currentPos = peekPos; // setup currentPos

   210         tk = getNext();

   211         peekPos = currentPos; // update peekPos

   212         return tk;

   213     }

   215     /**

   216      * Return the rest of the Header.

   217      *

   218      * @return String   rest of header. null is returned if we are

   219      *                  already at end of header

   220      */

   221     String getRemainder() {

   222             return string.substring(nextPos);

   223     }

   225     /*

   226      * Return the next token starting from 'currentPos'. After the

   227      * parse, 'currentPos' is updated to point to the start of the

   228      * next token.

   229      */

   230     private Token getNext() throws WebServiceException {

   231         // If we're already at end of string, return EOF

   232         if (currentPos >= maxPos)

   233             return EOFToken;

   235         // Skip white-space, position currentPos beyond the space

   236         if (skipWhiteSpace() == Token.EOF)

   237             return EOFToken;

   239         char c;

   240         int start;

   241         boolean filter = false;

   243         c = string.charAt(currentPos);

   245         // Check or Skip comments and position currentPos

   246         // beyond the comment

   247         while (c == '(') {

   248             // Parsing comment ..

   249             int nesting;

   250             for (start = ++currentPos, nesting = 1;

   251              nesting > 0 && currentPos < maxPos;

   252              currentPos++) {

   253             c = string.charAt(currentPos);

   254             if (c == '\\') {  // Escape sequence

   255                 currentPos++; // skip the escaped character

   256                 filter = true;

   257             } else if (c == '\r')

   258                 filter = true;

   259             else if (c == '(')

   260                 nesting++;

   261             else if (c == ')')

   262                 nesting--;

   263             }

   264             if (nesting != 0)

   265             throw new WebServiceException("Unbalanced comments");

   267             if (!skipComments) {

   268             // Return the comment, if we are asked to.

   269             // Note that the comment start & end markers are ignored.

   270             String s;

   271             if (filter) // need to go thru the token again.

   272                 s = filterToken(string, start, currentPos-1);

   273             else

   274                 s = string.substring(start,currentPos-1);

   276             return new Token(Token.COMMENT, s);

   277             }

   279             // Skip any whitespace after the comment.

   280             if (skipWhiteSpace() == Token.EOF)

   281             return EOFToken;

   282             c = string.charAt(currentPos);

   283         }

   285         // Check for quoted-string and position currentPos

   286         //  beyond the terminating quote

   287         if (c == '"') {

   288             for (start = ++currentPos; currentPos < maxPos; currentPos++) {

   289             c = string.charAt(currentPos);

   290             if (c == '\\') { // Escape sequence

   291                 currentPos++;

   292                 filter = true;

   293             } else if (c == '\r')

   294                 filter = true;

   295             else if (c == '"') {

   296                 currentPos++;

   297                 String s;

   299                 if (filter)

   300                 s = filterToken(string, start, currentPos-1);

   301                 else

   302                 s = string.substring(start,currentPos-1);

   304                 return new Token(Token.QUOTEDSTRING, s);

   305             }

   306             }

   307             throw new WebServiceException("Unbalanced quoted string");

   308         }

   310         // Check for SPECIAL or CTL

   311         if (c < 040 || c >= 0177 || delimiters.indexOf(c) >= 0) {

   312             currentPos++; // re-position currentPos

   313             char ch[] = new char[1];

   314             ch[0] = c;

   315             return new Token((int)c, new String(ch));

   316         }

   318         // Check for ATOM

   319         for (start = currentPos; currentPos < maxPos; currentPos++) {

   320             c = string.charAt(currentPos);

   321             // ATOM is delimited by either SPACE, CTL, "(", <">

   322             // or the specified SPECIALS

   323             if (c < 040 || c >= 0177 || c == '(' || c == ' ' ||

   324             c == '"' || delimiters.indexOf(c) >= 0)

   325             break;

   326         }

   327         return new Token(Token.ATOM, string.substring(start, currentPos));

   328         }

   330         // Skip SPACE, HT, CR and NL

   331         private int skipWhiteSpace() {

   332         char c;

   333         for (; currentPos < maxPos; currentPos++)

   334             if (((c = string.charAt(currentPos)) != ' ') &&

   335             (c != '\t') && (c != '\r') && (c != '\n'))

   336             return currentPos;

   337         return Token.EOF;

   338     }

   340     /* Process escape sequences and embedded LWSPs from a comment or

   341      * quoted string.

   342      */

   343     private static String filterToken(String s, int start, int end) {

   344         StringBuffer sb = new StringBuffer();

   345         char c;

   346         boolean gotEscape = false;

   347         boolean gotCR = false;

   349         for (int i = start; i < end; i++) {

   350             c = s.charAt(i);

   351             if (c == '\n' && gotCR) {

   352             // This LF is part of an unescaped

   353             // CRLF sequence (i.e, LWSP). Skip it.

   354             gotCR = false;

   355             continue;

   356             }

   358             gotCR = false;

   359             if (!gotEscape) {

   360             // Previous character was NOT '\'

   361             if (c == '\\') // skip this character

   362                 gotEscape = true;

   363             else if (c == '\r') // skip this character

   364                 gotCR = true;

   365             else // append this character

   366                 sb.append(c);

   367             } else {

   368             // Previous character was '\'. So no need to

   369             // bother with any special processing, just

   370             // append this character

   371             sb.append(c);

   372             gotEscape = false;

   373             }

   374         }

   375         return sb.toString();

   376     }

   377 }

Mercurial > jdk8-mips64-public > jaxws / file revision

src/share/jaxws_classes/com/sun/xml/internal/ws/encoding/HeaderTokenizer.java@373ffda63c9a

src/share/jaxws_classes/com/sun/xml/internal/ws/encoding/HeaderTokenizer.java