src/share/jaxws_classes/com/sun/xml/internal/ws/encoding/HeaderTokenizer.java

Wed, 27 Apr 2016 01:27:09 +0800

author
aoqi
date
Wed, 27 Apr 2016 01:27:09 +0800
changeset 0
373ffda63c9a
child 637
9c07ef4934dd
permissions
-rw-r--r--

Initial load
http://hg.openjdk.java.net/jdk8u/jdk8u/jaxws/
changeset: 657:d47a47f961ee
tag: jdk8u25-b17

     1 /*
     2  * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  */
    26 package com.sun.xml.internal.ws.encoding;
    28 import javax.xml.ws.WebServiceException;
    30 /**
    31  * This class tokenizes RFC822 and MIME headers into the basic
    32  * symbols specified by RFC822 and MIME. <p>
    33  *
    34  * This class handles folded headers (ie headers with embedded
    35  * CRLF SPACE sequences). The folds are removed in the returned
    36  * tokens.
    37  *
    38  * @version 1.9, 02/03/27
    39  * @author  John Mani
    40  */
    42 class HeaderTokenizer {
    44     /**
    45      * The Token class represents tokens returned by the
    46      * HeaderTokenizer.
    47      */
    48     static class Token {
    50         private int type;
    51         private String value;
    53         /**
    54          * Token type indicating an ATOM.
    55          */
    56         public static final int ATOM            = -1;
    58         /**
    59          * Token type indicating a quoted string. The value
    60          * field contains the string without the quotes.
    61          */
    62         public static final int QUOTEDSTRING    = -2;
    64         /**
    65          * Token type indicating a comment. The value field
    66          * contains the comment string without the comment
    67          * start and end symbols.
    68          */
    69         public static final int COMMENT         = -3;
    71         /**
    72          * Token type indicating end of input.
    73          */
    74         public static final int  EOF            = -4;
    76         /**
    77          * Constructor.
    78          * @param       type    Token type
    79          * @param       value   Token value
    80          */
    81         public Token(int type, String value) {
    82              this.type = type;
    83              this.value = value;
    84         }
    86         /**
    87          * Return the type of the token. If the token represents a
    88          * delimiter or a control character, the type is that character
    89          * itself, converted to an integer. Otherwise, it's value is
    90          * one of the following:
    91          * <ul>
    92          * <li><code>ATOM</code> A sequence of ASCII characters
    93          *      delimited by either SPACE, CTL, "(", <"> or the
    94          *      specified SPECIALS
    95          * <li><code>QUOTEDSTRING</code> A sequence of ASCII characters
    96          *      within quotes
    97          * <li><code>COMMENT</code> A sequence of ASCII characters
    98          *      within "(" and ")".
    99          * <li><code>EOF</code> End of header
   100          * </ul>
   101          */
   102         public int getType() {
   103             return type;
   104         }
   106         /**
   107          * Returns the value of the token just read. When the current
   108          * token is a quoted string, this field contains the body of the
   109          * string, without the quotes. When the current token is a comment,
   110          * this field contains the body of the comment.
   111          *
   112          * @return      token value
   113          */
   114         public String getValue() {
   115             return value;
   116         }
   117     }
   119     private String string; // the string to be tokenized
   120     private boolean skipComments; // should comments be skipped ?
   121     private String delimiters; // delimiter string
   122     private int currentPos; // current parse position
   123     private int maxPos; // string length
   124     private int nextPos; // track start of next Token for next()
   125     private int peekPos; // track start of next Token for peek()
   127     /**
   128      * RFC822 specials
   129      */
   130     private final static String RFC822 = "()<>@,;:\\\"\t .[]";
   132     /**
   133      * MIME specials
   134      */
   135     final static String MIME = "()<>@,;:\\\"\t []/?=";
   137     // The EOF Token
   138     private final static Token EOFToken = new Token(Token.EOF, null);
   140     /**
   141      * Constructor that takes a rfc822 style header.
   142      *
   143      * @param   header  The rfc822 header to be tokenized
   144      * @param   delimiters      Set of delimiter characters
   145      *                          to be used to delimit ATOMS. These
   146      *                          are usually <code>RFC822</code> or
   147      *                          <code>MIME</code>
   148      * @param   skipComments  If true, comments are skipped and
   149      *                          not returned as tokens
   150      */
   151     HeaderTokenizer(String header, String delimiters,
   152                            boolean skipComments) {
   153         string = (header == null) ? "" : header; // paranoia ?!
   154         this.skipComments = skipComments;
   155         this.delimiters = delimiters;
   156         currentPos = nextPos = peekPos = 0;
   157         maxPos = string.length();
   158     }
   160     /**
   161      * Constructor. Comments are ignored and not returned as tokens
   162      *
   163      * @param   header  The header that is tokenized
   164      * @param   delimiters  The delimiters to be used
   165      */
   166     HeaderTokenizer(String header, String delimiters) {
   167             this(header, delimiters, true);
   168     }
   170     /**
   171      * Constructor. The RFC822 defined delimiters - RFC822 - are
   172      * used to delimit ATOMS. Also comments are skipped and not
   173      * returned as tokens
   174      */
   175     HeaderTokenizer(String header)  {
   176             this(header, RFC822);
   177     }
   179     /**
   180      * Parses the next token from this String. <p>
   181      *
   182      * Clients sit in a loop calling next() to parse successive
   183      * tokens until an EOF Token is returned.
   184      *
   185      * @return          the next Token
   186      * @exception WebServiceException if the parse fails
   187      */
   188     Token next() throws WebServiceException {
   189         Token tk;
   191         currentPos = nextPos; // setup currentPos
   192         tk = getNext();
   193         nextPos = peekPos = currentPos; // update currentPos and peekPos
   194         return tk;
   195     }
   197     /**
   198      * Peek at the next token, without actually removing the token
   199      * from the parse stream. Invoking this method multiple times
   200      * will return successive tokens, until <code>next()</code> is
   201      * called. <p>
   202      *
   203      * @return          the next Token
   204      * @exception       WebServiceException if the parse fails
   205      */
   206     Token peek() throws WebServiceException {
   207         Token tk;
   209         currentPos = peekPos; // setup currentPos
   210         tk = getNext();
   211         peekPos = currentPos; // update peekPos
   212         return tk;
   213     }
   215     /**
   216      * Return the rest of the Header.
   217      *
   218      * @return String   rest of header. null is returned if we are
   219      *                  already at end of header
   220      */
   221     String getRemainder() {
   222             return string.substring(nextPos);
   223     }
   225     /*
   226      * Return the next token starting from 'currentPos'. After the
   227      * parse, 'currentPos' is updated to point to the start of the
   228      * next token.
   229      */
   230     private Token getNext() throws WebServiceException {
   231         // If we're already at end of string, return EOF
   232         if (currentPos >= maxPos)
   233             return EOFToken;
   235         // Skip white-space, position currentPos beyond the space
   236         if (skipWhiteSpace() == Token.EOF)
   237             return EOFToken;
   239         char c;
   240         int start;
   241         boolean filter = false;
   243         c = string.charAt(currentPos);
   245         // Check or Skip comments and position currentPos
   246         // beyond the comment
   247         while (c == '(') {
   248             // Parsing comment ..
   249             int nesting;
   250             for (start = ++currentPos, nesting = 1;
   251              nesting > 0 && currentPos < maxPos;
   252              currentPos++) {
   253             c = string.charAt(currentPos);
   254             if (c == '\\') {  // Escape sequence
   255                 currentPos++; // skip the escaped character
   256                 filter = true;
   257             } else if (c == '\r')
   258                 filter = true;
   259             else if (c == '(')
   260                 nesting++;
   261             else if (c == ')')
   262                 nesting--;
   263             }
   264             if (nesting != 0)
   265             throw new WebServiceException("Unbalanced comments");
   267             if (!skipComments) {
   268             // Return the comment, if we are asked to.
   269             // Note that the comment start & end markers are ignored.
   270             String s;
   271             if (filter) // need to go thru the token again.
   272                 s = filterToken(string, start, currentPos-1);
   273             else
   274                 s = string.substring(start,currentPos-1);
   276             return new Token(Token.COMMENT, s);
   277             }
   279             // Skip any whitespace after the comment.
   280             if (skipWhiteSpace() == Token.EOF)
   281             return EOFToken;
   282             c = string.charAt(currentPos);
   283         }
   285         // Check for quoted-string and position currentPos
   286         //  beyond the terminating quote
   287         if (c == '"') {
   288             for (start = ++currentPos; currentPos < maxPos; currentPos++) {
   289             c = string.charAt(currentPos);
   290             if (c == '\\') { // Escape sequence
   291                 currentPos++;
   292                 filter = true;
   293             } else if (c == '\r')
   294                 filter = true;
   295             else if (c == '"') {
   296                 currentPos++;
   297                 String s;
   299                 if (filter)
   300                 s = filterToken(string, start, currentPos-1);
   301                 else
   302                 s = string.substring(start,currentPos-1);
   304                 return new Token(Token.QUOTEDSTRING, s);
   305             }
   306             }
   307             throw new WebServiceException("Unbalanced quoted string");
   308         }
   310         // Check for SPECIAL or CTL
   311         if (c < 040 || c >= 0177 || delimiters.indexOf(c) >= 0) {
   312             currentPos++; // re-position currentPos
   313             char ch[] = new char[1];
   314             ch[0] = c;
   315             return new Token((int)c, new String(ch));
   316         }
   318         // Check for ATOM
   319         for (start = currentPos; currentPos < maxPos; currentPos++) {
   320             c = string.charAt(currentPos);
   321             // ATOM is delimited by either SPACE, CTL, "(", <">
   322             // or the specified SPECIALS
   323             if (c < 040 || c >= 0177 || c == '(' || c == ' ' ||
   324             c == '"' || delimiters.indexOf(c) >= 0)
   325             break;
   326         }
   327         return new Token(Token.ATOM, string.substring(start, currentPos));
   328         }
   330         // Skip SPACE, HT, CR and NL
   331         private int skipWhiteSpace() {
   332         char c;
   333         for (; currentPos < maxPos; currentPos++)
   334             if (((c = string.charAt(currentPos)) != ' ') &&
   335             (c != '\t') && (c != '\r') && (c != '\n'))
   336             return currentPos;
   337         return Token.EOF;
   338     }
   340     /* Process escape sequences and embedded LWSPs from a comment or
   341      * quoted string.
   342      */
   343     private static String filterToken(String s, int start, int end) {
   344         StringBuffer sb = new StringBuffer();
   345         char c;
   346         boolean gotEscape = false;
   347         boolean gotCR = false;
   349         for (int i = start; i < end; i++) {
   350             c = s.charAt(i);
   351             if (c == '\n' && gotCR) {
   352             // This LF is part of an unescaped
   353             // CRLF sequence (i.e, LWSP). Skip it.
   354             gotCR = false;
   355             continue;
   356             }
   358             gotCR = false;
   359             if (!gotEscape) {
   360             // Previous character was NOT '\'
   361             if (c == '\\') // skip this character
   362                 gotEscape = true;
   363             else if (c == '\r') // skip this character
   364                 gotCR = true;
   365             else // append this character
   366                 sb.append(c);
   367             } else {
   368             // Previous character was '\'. So no need to
   369             // bother with any special processing, just
   370             // append this character
   371             sb.append(c);
   372             gotEscape = false;
   373             }
   374         }
   375         return sb.toString();
   376     }
   377 }

mercurial