diff -r 000000000000 -r 373ffda63c9a src/share/jaxws_classes/com/sun/xml/internal/ws/encoding/HeaderTokenizer.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/jaxws_classes/com/sun/xml/internal/ws/encoding/HeaderTokenizer.java Wed Apr 27 01:27:09 2016 +0800 @@ -0,0 +1,377 @@ +/* + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package com.sun.xml.internal.ws.encoding; + +import javax.xml.ws.WebServiceException; + +/** + * This class tokenizes RFC822 and MIME headers into the basic + * symbols specified by RFC822 and MIME.

+ * + * This class handles folded headers (ie headers with embedded + * CRLF SPACE sequences). The folds are removed in the returned + * tokens. + * + * @version 1.9, 02/03/27 + * @author John Mani + */ + +class HeaderTokenizer { + + /** + * The Token class represents tokens returned by the + * HeaderTokenizer. + */ + static class Token { + + private int type; + private String value; + + /** + * Token type indicating an ATOM. + */ + public static final int ATOM = -1; + + /** + * Token type indicating a quoted string. The value + * field contains the string without the quotes. + */ + public static final int QUOTEDSTRING = -2; + + /** + * Token type indicating a comment. The value field + * contains the comment string without the comment + * start and end symbols. + */ + public static final int COMMENT = -3; + + /** + * Token type indicating end of input. + */ + public static final int EOF = -4; + + /** + * Constructor. + * @param type Token type + * @param value Token value + */ + public Token(int type, String value) { + this.type = type; + this.value = value; + } + + /** + * Return the type of the token. If the token represents a + * delimiter or a control character, the type is that character + * itself, converted to an integer. Otherwise, it's value is + * one of the following: + *

+ */ + public int getType() { + return type; + } + + /** + * Returns the value of the token just read. When the current + * token is a quoted string, this field contains the body of the + * string, without the quotes. When the current token is a comment, + * this field contains the body of the comment. + * + * @return token value + */ + public String getValue() { + return value; + } + } + + private String string; // the string to be tokenized + private boolean skipComments; // should comments be skipped ? + private String delimiters; // delimiter string + private int currentPos; // current parse position + private int maxPos; // string length + private int nextPos; // track start of next Token for next() + private int peekPos; // track start of next Token for peek() + + /** + * RFC822 specials + */ + private final static String RFC822 = "()<>@,;:\\\"\t .[]"; + + /** + * MIME specials + */ + final static String MIME = "()<>@,;:\\\"\t []/?="; + + // The EOF Token + private final static Token EOFToken = new Token(Token.EOF, null); + + /** + * Constructor that takes a rfc822 style header. + * + * @param header The rfc822 header to be tokenized + * @param delimiters Set of delimiter characters + * to be used to delimit ATOMS. These + * are usually RFC822 or + * MIME + * @param skipComments If true, comments are skipped and + * not returned as tokens + */ + HeaderTokenizer(String header, String delimiters, + boolean skipComments) { + string = (header == null) ? "" : header; // paranoia ?! + this.skipComments = skipComments; + this.delimiters = delimiters; + currentPos = nextPos = peekPos = 0; + maxPos = string.length(); + } + + /** + * Constructor. Comments are ignored and not returned as tokens + * + * @param header The header that is tokenized + * @param delimiters The delimiters to be used + */ + HeaderTokenizer(String header, String delimiters) { + this(header, delimiters, true); + } + + /** + * Constructor. The RFC822 defined delimiters - RFC822 - are + * used to delimit ATOMS. Also comments are skipped and not + * returned as tokens + */ + HeaderTokenizer(String header) { + this(header, RFC822); + } + + /** + * Parses the next token from this String.

+ * + * Clients sit in a loop calling next() to parse successive + * tokens until an EOF Token is returned. + * + * @return the next Token + * @exception WebServiceException if the parse fails + */ + Token next() throws WebServiceException { + Token tk; + + currentPos = nextPos; // setup currentPos + tk = getNext(); + nextPos = peekPos = currentPos; // update currentPos and peekPos + return tk; + } + + /** + * Peek at the next token, without actually removing the token + * from the parse stream. Invoking this method multiple times + * will return successive tokens, until next() is + * called.

+ * + * @return the next Token + * @exception WebServiceException if the parse fails + */ + Token peek() throws WebServiceException { + Token tk; + + currentPos = peekPos; // setup currentPos + tk = getNext(); + peekPos = currentPos; // update peekPos + return tk; + } + + /** + * Return the rest of the Header. + * + * @return String rest of header. null is returned if we are + * already at end of header + */ + String getRemainder() { + return string.substring(nextPos); + } + + /* + * Return the next token starting from 'currentPos'. After the + * parse, 'currentPos' is updated to point to the start of the + * next token. + */ + private Token getNext() throws WebServiceException { + // If we're already at end of string, return EOF + if (currentPos >= maxPos) + return EOFToken; + + // Skip white-space, position currentPos beyond the space + if (skipWhiteSpace() == Token.EOF) + return EOFToken; + + char c; + int start; + boolean filter = false; + + c = string.charAt(currentPos); + + // Check or Skip comments and position currentPos + // beyond the comment + while (c == '(') { + // Parsing comment .. + int nesting; + for (start = ++currentPos, nesting = 1; + nesting > 0 && currentPos < maxPos; + currentPos++) { + c = string.charAt(currentPos); + if (c == '\\') { // Escape sequence + currentPos++; // skip the escaped character + filter = true; + } else if (c == '\r') + filter = true; + else if (c == '(') + nesting++; + else if (c == ')') + nesting--; + } + if (nesting != 0) + throw new WebServiceException("Unbalanced comments"); + + if (!skipComments) { + // Return the comment, if we are asked to. + // Note that the comment start & end markers are ignored. + String s; + if (filter) // need to go thru the token again. + s = filterToken(string, start, currentPos-1); + else + s = string.substring(start,currentPos-1); + + return new Token(Token.COMMENT, s); + } + + // Skip any whitespace after the comment. + if (skipWhiteSpace() == Token.EOF) + return EOFToken; + c = string.charAt(currentPos); + } + + // Check for quoted-string and position currentPos + // beyond the terminating quote + if (c == '"') { + for (start = ++currentPos; currentPos < maxPos; currentPos++) { + c = string.charAt(currentPos); + if (c == '\\') { // Escape sequence + currentPos++; + filter = true; + } else if (c == '\r') + filter = true; + else if (c == '"') { + currentPos++; + String s; + + if (filter) + s = filterToken(string, start, currentPos-1); + else + s = string.substring(start,currentPos-1); + + return new Token(Token.QUOTEDSTRING, s); + } + } + throw new WebServiceException("Unbalanced quoted string"); + } + + // Check for SPECIAL or CTL + if (c < 040 || c >= 0177 || delimiters.indexOf(c) >= 0) { + currentPos++; // re-position currentPos + char ch[] = new char[1]; + ch[0] = c; + return new Token((int)c, new String(ch)); + } + + // Check for ATOM + for (start = currentPos; currentPos < maxPos; currentPos++) { + c = string.charAt(currentPos); + // ATOM is delimited by either SPACE, CTL, "(", <"> + // or the specified SPECIALS + if (c < 040 || c >= 0177 || c == '(' || c == ' ' || + c == '"' || delimiters.indexOf(c) >= 0) + break; + } + return new Token(Token.ATOM, string.substring(start, currentPos)); + } + + // Skip SPACE, HT, CR and NL + private int skipWhiteSpace() { + char c; + for (; currentPos < maxPos; currentPos++) + if (((c = string.charAt(currentPos)) != ' ') && + (c != '\t') && (c != '\r') && (c != '\n')) + return currentPos; + return Token.EOF; + } + + /* Process escape sequences and embedded LWSPs from a comment or + * quoted string. + */ + private static String filterToken(String s, int start, int end) { + StringBuffer sb = new StringBuffer(); + char c; + boolean gotEscape = false; + boolean gotCR = false; + + for (int i = start; i < end; i++) { + c = s.charAt(i); + if (c == '\n' && gotCR) { + // This LF is part of an unescaped + // CRLF sequence (i.e, LWSP). Skip it. + gotCR = false; + continue; + } + + gotCR = false; + if (!gotEscape) { + // Previous character was NOT '\' + if (c == '\\') // skip this character + gotEscape = true; + else if (c == '\r') // skip this character + gotCR = true; + else // append this character + sb.append(c); + } else { + // Previous character was '\'. So no need to + // bother with any special processing, just + // append this character + sb.append(c); + gotEscape = false; + } + } + return sb.toString(); + } +}