src/share/jaxws_classes/com/sun/xml/internal/ws/encoding/HeaderTokenizer.java

changeset 0
373ffda63c9a
child 637
9c07ef4934dd
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/share/jaxws_classes/com/sun/xml/internal/ws/encoding/HeaderTokenizer.java	Wed Apr 27 01:27:09 2016 +0800
     1.3 @@ -0,0 +1,377 @@
     1.4 +/*
     1.5 + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.  Oracle designates this
    1.11 + * particular file as subject to the "Classpath" exception as provided
    1.12 + * by Oracle in the LICENSE file that accompanied this code.
    1.13 + *
    1.14 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.16 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.17 + * version 2 for more details (a copy is included in the LICENSE file that
    1.18 + * accompanied this code).
    1.19 + *
    1.20 + * You should have received a copy of the GNU General Public License version
    1.21 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.23 + *
    1.24 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    1.25 + * or visit www.oracle.com if you need additional information or have any
    1.26 + * questions.
    1.27 + */
    1.28 +
    1.29 +package com.sun.xml.internal.ws.encoding;
    1.30 +
    1.31 +import javax.xml.ws.WebServiceException;
    1.32 +
    1.33 +/**
    1.34 + * This class tokenizes RFC822 and MIME headers into the basic
    1.35 + * symbols specified by RFC822 and MIME. <p>
    1.36 + *
    1.37 + * This class handles folded headers (ie headers with embedded
    1.38 + * CRLF SPACE sequences). The folds are removed in the returned
    1.39 + * tokens.
    1.40 + *
    1.41 + * @version 1.9, 02/03/27
    1.42 + * @author  John Mani
    1.43 + */
    1.44 +
    1.45 +class HeaderTokenizer {
    1.46 +
    1.47 +    /**
    1.48 +     * The Token class represents tokens returned by the
    1.49 +     * HeaderTokenizer.
    1.50 +     */
    1.51 +    static class Token {
    1.52 +
    1.53 +        private int type;
    1.54 +        private String value;
    1.55 +
    1.56 +        /**
    1.57 +         * Token type indicating an ATOM.
    1.58 +         */
    1.59 +        public static final int ATOM            = -1;
    1.60 +
    1.61 +        /**
    1.62 +         * Token type indicating a quoted string. The value
    1.63 +         * field contains the string without the quotes.
    1.64 +         */
    1.65 +        public static final int QUOTEDSTRING    = -2;
    1.66 +
    1.67 +        /**
    1.68 +         * Token type indicating a comment. The value field
    1.69 +         * contains the comment string without the comment
    1.70 +         * start and end symbols.
    1.71 +         */
    1.72 +        public static final int COMMENT         = -3;
    1.73 +
    1.74 +        /**
    1.75 +         * Token type indicating end of input.
    1.76 +         */
    1.77 +        public static final int  EOF            = -4;
    1.78 +
    1.79 +        /**
    1.80 +         * Constructor.
    1.81 +         * @param       type    Token type
    1.82 +         * @param       value   Token value
    1.83 +         */
    1.84 +        public Token(int type, String value) {
    1.85 +             this.type = type;
    1.86 +             this.value = value;
    1.87 +        }
    1.88 +
    1.89 +        /**
    1.90 +         * Return the type of the token. If the token represents a
    1.91 +         * delimiter or a control character, the type is that character
    1.92 +         * itself, converted to an integer. Otherwise, it's value is
    1.93 +         * one of the following:
    1.94 +         * <ul>
    1.95 +         * <li><code>ATOM</code> A sequence of ASCII characters
    1.96 +         *      delimited by either SPACE, CTL, "(", <"> or the
    1.97 +         *      specified SPECIALS
    1.98 +         * <li><code>QUOTEDSTRING</code> A sequence of ASCII characters
    1.99 +         *      within quotes
   1.100 +         * <li><code>COMMENT</code> A sequence of ASCII characters
   1.101 +         *      within "(" and ")".
   1.102 +         * <li><code>EOF</code> End of header
   1.103 +         * </ul>
   1.104 +         */
   1.105 +        public int getType() {
   1.106 +            return type;
   1.107 +        }
   1.108 +
   1.109 +        /**
   1.110 +         * Returns the value of the token just read. When the current
   1.111 +         * token is a quoted string, this field contains the body of the
   1.112 +         * string, without the quotes. When the current token is a comment,
   1.113 +         * this field contains the body of the comment.
   1.114 +         *
   1.115 +         * @return      token value
   1.116 +         */
   1.117 +        public String getValue() {
   1.118 +            return value;
   1.119 +        }
   1.120 +    }
   1.121 +
   1.122 +    private String string; // the string to be tokenized
   1.123 +    private boolean skipComments; // should comments be skipped ?
   1.124 +    private String delimiters; // delimiter string
   1.125 +    private int currentPos; // current parse position
   1.126 +    private int maxPos; // string length
   1.127 +    private int nextPos; // track start of next Token for next()
   1.128 +    private int peekPos; // track start of next Token for peek()
   1.129 +
   1.130 +    /**
   1.131 +     * RFC822 specials
   1.132 +     */
   1.133 +    private final static String RFC822 = "()<>@,;:\\\"\t .[]";
   1.134 +
   1.135 +    /**
   1.136 +     * MIME specials
   1.137 +     */
   1.138 +    final static String MIME = "()<>@,;:\\\"\t []/?=";
   1.139 +
   1.140 +    // The EOF Token
   1.141 +    private final static Token EOFToken = new Token(Token.EOF, null);
   1.142 +
   1.143 +    /**
   1.144 +     * Constructor that takes a rfc822 style header.
   1.145 +     *
   1.146 +     * @param   header  The rfc822 header to be tokenized
   1.147 +     * @param   delimiters      Set of delimiter characters
   1.148 +     *                          to be used to delimit ATOMS. These
   1.149 +     *                          are usually <code>RFC822</code> or
   1.150 +     *                          <code>MIME</code>
   1.151 +     * @param   skipComments  If true, comments are skipped and
   1.152 +     *                          not returned as tokens
   1.153 +     */
   1.154 +    HeaderTokenizer(String header, String delimiters,
   1.155 +                           boolean skipComments) {
   1.156 +        string = (header == null) ? "" : header; // paranoia ?!
   1.157 +        this.skipComments = skipComments;
   1.158 +        this.delimiters = delimiters;
   1.159 +        currentPos = nextPos = peekPos = 0;
   1.160 +        maxPos = string.length();
   1.161 +    }
   1.162 +
   1.163 +    /**
   1.164 +     * Constructor. Comments are ignored and not returned as tokens
   1.165 +     *
   1.166 +     * @param   header  The header that is tokenized
   1.167 +     * @param   delimiters  The delimiters to be used
   1.168 +     */
   1.169 +    HeaderTokenizer(String header, String delimiters) {
   1.170 +            this(header, delimiters, true);
   1.171 +    }
   1.172 +
   1.173 +    /**
   1.174 +     * Constructor. The RFC822 defined delimiters - RFC822 - are
   1.175 +     * used to delimit ATOMS. Also comments are skipped and not
   1.176 +     * returned as tokens
   1.177 +     */
   1.178 +    HeaderTokenizer(String header)  {
   1.179 +            this(header, RFC822);
   1.180 +    }
   1.181 +
   1.182 +    /**
   1.183 +     * Parses the next token from this String. <p>
   1.184 +     *
   1.185 +     * Clients sit in a loop calling next() to parse successive
   1.186 +     * tokens until an EOF Token is returned.
   1.187 +     *
   1.188 +     * @return          the next Token
   1.189 +     * @exception WebServiceException if the parse fails
   1.190 +     */
   1.191 +    Token next() throws WebServiceException {
   1.192 +        Token tk;
   1.193 +
   1.194 +        currentPos = nextPos; // setup currentPos
   1.195 +        tk = getNext();
   1.196 +        nextPos = peekPos = currentPos; // update currentPos and peekPos
   1.197 +        return tk;
   1.198 +    }
   1.199 +
   1.200 +    /**
   1.201 +     * Peek at the next token, without actually removing the token
   1.202 +     * from the parse stream. Invoking this method multiple times
   1.203 +     * will return successive tokens, until <code>next()</code> is
   1.204 +     * called. <p>
   1.205 +     *
   1.206 +     * @return          the next Token
   1.207 +     * @exception       WebServiceException if the parse fails
   1.208 +     */
   1.209 +    Token peek() throws WebServiceException {
   1.210 +        Token tk;
   1.211 +
   1.212 +        currentPos = peekPos; // setup currentPos
   1.213 +        tk = getNext();
   1.214 +        peekPos = currentPos; // update peekPos
   1.215 +        return tk;
   1.216 +    }
   1.217 +
   1.218 +    /**
   1.219 +     * Return the rest of the Header.
   1.220 +     *
   1.221 +     * @return String   rest of header. null is returned if we are
   1.222 +     *                  already at end of header
   1.223 +     */
   1.224 +    String getRemainder() {
   1.225 +            return string.substring(nextPos);
   1.226 +    }
   1.227 +
   1.228 +    /*
   1.229 +     * Return the next token starting from 'currentPos'. After the
   1.230 +     * parse, 'currentPos' is updated to point to the start of the
   1.231 +     * next token.
   1.232 +     */
   1.233 +    private Token getNext() throws WebServiceException {
   1.234 +        // If we're already at end of string, return EOF
   1.235 +        if (currentPos >= maxPos)
   1.236 +            return EOFToken;
   1.237 +
   1.238 +        // Skip white-space, position currentPos beyond the space
   1.239 +        if (skipWhiteSpace() == Token.EOF)
   1.240 +            return EOFToken;
   1.241 +
   1.242 +        char c;
   1.243 +        int start;
   1.244 +        boolean filter = false;
   1.245 +
   1.246 +        c = string.charAt(currentPos);
   1.247 +
   1.248 +        // Check or Skip comments and position currentPos
   1.249 +        // beyond the comment
   1.250 +        while (c == '(') {
   1.251 +            // Parsing comment ..
   1.252 +            int nesting;
   1.253 +            for (start = ++currentPos, nesting = 1;
   1.254 +             nesting > 0 && currentPos < maxPos;
   1.255 +             currentPos++) {
   1.256 +            c = string.charAt(currentPos);
   1.257 +            if (c == '\\') {  // Escape sequence
   1.258 +                currentPos++; // skip the escaped character
   1.259 +                filter = true;
   1.260 +            } else if (c == '\r')
   1.261 +                filter = true;
   1.262 +            else if (c == '(')
   1.263 +                nesting++;
   1.264 +            else if (c == ')')
   1.265 +                nesting--;
   1.266 +            }
   1.267 +            if (nesting != 0)
   1.268 +            throw new WebServiceException("Unbalanced comments");
   1.269 +
   1.270 +            if (!skipComments) {
   1.271 +            // Return the comment, if we are asked to.
   1.272 +            // Note that the comment start & end markers are ignored.
   1.273 +            String s;
   1.274 +            if (filter) // need to go thru the token again.
   1.275 +                s = filterToken(string, start, currentPos-1);
   1.276 +            else
   1.277 +                s = string.substring(start,currentPos-1);
   1.278 +
   1.279 +            return new Token(Token.COMMENT, s);
   1.280 +            }
   1.281 +
   1.282 +            // Skip any whitespace after the comment.
   1.283 +            if (skipWhiteSpace() == Token.EOF)
   1.284 +            return EOFToken;
   1.285 +            c = string.charAt(currentPos);
   1.286 +        }
   1.287 +
   1.288 +        // Check for quoted-string and position currentPos
   1.289 +        //  beyond the terminating quote
   1.290 +        if (c == '"') {
   1.291 +            for (start = ++currentPos; currentPos < maxPos; currentPos++) {
   1.292 +            c = string.charAt(currentPos);
   1.293 +            if (c == '\\') { // Escape sequence
   1.294 +                currentPos++;
   1.295 +                filter = true;
   1.296 +            } else if (c == '\r')
   1.297 +                filter = true;
   1.298 +            else if (c == '"') {
   1.299 +                currentPos++;
   1.300 +                String s;
   1.301 +
   1.302 +                if (filter)
   1.303 +                s = filterToken(string, start, currentPos-1);
   1.304 +                else
   1.305 +                s = string.substring(start,currentPos-1);
   1.306 +
   1.307 +                return new Token(Token.QUOTEDSTRING, s);
   1.308 +            }
   1.309 +            }
   1.310 +            throw new WebServiceException("Unbalanced quoted string");
   1.311 +        }
   1.312 +
   1.313 +        // Check for SPECIAL or CTL
   1.314 +        if (c < 040 || c >= 0177 || delimiters.indexOf(c) >= 0) {
   1.315 +            currentPos++; // re-position currentPos
   1.316 +            char ch[] = new char[1];
   1.317 +            ch[0] = c;
   1.318 +            return new Token((int)c, new String(ch));
   1.319 +        }
   1.320 +
   1.321 +        // Check for ATOM
   1.322 +        for (start = currentPos; currentPos < maxPos; currentPos++) {
   1.323 +            c = string.charAt(currentPos);
   1.324 +            // ATOM is delimited by either SPACE, CTL, "(", <">
   1.325 +            // or the specified SPECIALS
   1.326 +            if (c < 040 || c >= 0177 || c == '(' || c == ' ' ||
   1.327 +            c == '"' || delimiters.indexOf(c) >= 0)
   1.328 +            break;
   1.329 +        }
   1.330 +        return new Token(Token.ATOM, string.substring(start, currentPos));
   1.331 +        }
   1.332 +
   1.333 +        // Skip SPACE, HT, CR and NL
   1.334 +        private int skipWhiteSpace() {
   1.335 +        char c;
   1.336 +        for (; currentPos < maxPos; currentPos++)
   1.337 +            if (((c = string.charAt(currentPos)) != ' ') &&
   1.338 +            (c != '\t') && (c != '\r') && (c != '\n'))
   1.339 +            return currentPos;
   1.340 +        return Token.EOF;
   1.341 +    }
   1.342 +
   1.343 +    /* Process escape sequences and embedded LWSPs from a comment or
   1.344 +     * quoted string.
   1.345 +     */
   1.346 +    private static String filterToken(String s, int start, int end) {
   1.347 +        StringBuffer sb = new StringBuffer();
   1.348 +        char c;
   1.349 +        boolean gotEscape = false;
   1.350 +        boolean gotCR = false;
   1.351 +
   1.352 +        for (int i = start; i < end; i++) {
   1.353 +            c = s.charAt(i);
   1.354 +            if (c == '\n' && gotCR) {
   1.355 +            // This LF is part of an unescaped
   1.356 +            // CRLF sequence (i.e, LWSP). Skip it.
   1.357 +            gotCR = false;
   1.358 +            continue;
   1.359 +            }
   1.360 +
   1.361 +            gotCR = false;
   1.362 +            if (!gotEscape) {
   1.363 +            // Previous character was NOT '\'
   1.364 +            if (c == '\\') // skip this character
   1.365 +                gotEscape = true;
   1.366 +            else if (c == '\r') // skip this character
   1.367 +                gotCR = true;
   1.368 +            else // append this character
   1.369 +                sb.append(c);
   1.370 +            } else {
   1.371 +            // Previous character was '\'. So no need to
   1.372 +            // bother with any special processing, just
   1.373 +            // append this character
   1.374 +            sb.append(c);
   1.375 +            gotEscape = false;
   1.376 +            }
   1.377 +        }
   1.378 +        return sb.toString();
   1.379 +    }
   1.380 +}

mercurial