ohair@286: /* alanb@368: * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. ohair@286: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ohair@286: * ohair@286: * This code is free software; you can redistribute it and/or modify it ohair@286: * under the terms of the GNU General Public License version 2 only, as ohair@286: * published by the Free Software Foundation. Oracle designates this ohair@286: * particular file as subject to the "Classpath" exception as provided ohair@286: * by Oracle in the LICENSE file that accompanied this code. ohair@286: * ohair@286: * This code is distributed in the hope that it will be useful, but WITHOUT ohair@286: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ohair@286: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ohair@286: * version 2 for more details (a copy is included in the LICENSE file that ohair@286: * accompanied this code). ohair@286: * ohair@286: * You should have received a copy of the GNU General Public License version ohair@286: * 2 along with this work; if not, write to the Free Software Foundation, ohair@286: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ohair@286: * ohair@286: * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ohair@286: * or visit www.oracle.com if you need additional information or have any ohair@286: * questions. ohair@286: */ ohair@286: ohair@286: package com.sun.xml.internal.ws.encoding; ohair@286: ohair@286: import javax.xml.ws.WebServiceException; ohair@286: ohair@286: /** ohair@286: * This class tokenizes RFC822 and MIME headers into the basic ohair@286: * symbols specified by RFC822 and MIME.

ohair@286: * ohair@286: * This class handles folded headers (ie headers with embedded ohair@286: * CRLF SPACE sequences). The folds are removed in the returned ohair@286: * tokens. ohair@286: * ohair@286: * @version 1.9, 02/03/27 ohair@286: * @author John Mani ohair@286: */ ohair@286: ohair@286: class HeaderTokenizer { ohair@286: ohair@286: /** ohair@286: * The Token class represents tokens returned by the ohair@286: * HeaderTokenizer. ohair@286: */ ohair@286: static class Token { ohair@286: ohair@286: private int type; ohair@286: private String value; ohair@286: ohair@286: /** ohair@286: * Token type indicating an ATOM. ohair@286: */ ohair@286: public static final int ATOM = -1; ohair@286: ohair@286: /** ohair@286: * Token type indicating a quoted string. The value ohair@286: * field contains the string without the quotes. ohair@286: */ ohair@286: public static final int QUOTEDSTRING = -2; ohair@286: ohair@286: /** ohair@286: * Token type indicating a comment. The value field ohair@286: * contains the comment string without the comment ohair@286: * start and end symbols. ohair@286: */ ohair@286: public static final int COMMENT = -3; ohair@286: ohair@286: /** ohair@286: * Token type indicating end of input. ohair@286: */ ohair@286: public static final int EOF = -4; ohair@286: ohair@286: /** ohair@286: * Constructor. ohair@286: * @param type Token type ohair@286: * @param value Token value ohair@286: */ ohair@286: public Token(int type, String value) { ohair@286: this.type = type; ohair@286: this.value = value; ohair@286: } ohair@286: ohair@286: /** ohair@286: * Return the type of the token. If the token represents a ohair@286: * delimiter or a control character, the type is that character ohair@286: * itself, converted to an integer. Otherwise, it's value is ohair@286: * one of the following: ohair@286: *

ohair@286: */ ohair@286: public int getType() { ohair@286: return type; ohair@286: } ohair@286: ohair@286: /** ohair@286: * Returns the value of the token just read. When the current ohair@286: * token is a quoted string, this field contains the body of the ohair@286: * string, without the quotes. When the current token is a comment, ohair@286: * this field contains the body of the comment. ohair@286: * ohair@286: * @return token value ohair@286: */ ohair@286: public String getValue() { ohair@286: return value; ohair@286: } ohair@286: } ohair@286: ohair@286: private String string; // the string to be tokenized ohair@286: private boolean skipComments; // should comments be skipped ? ohair@286: private String delimiters; // delimiter string ohair@286: private int currentPos; // current parse position ohair@286: private int maxPos; // string length ohair@286: private int nextPos; // track start of next Token for next() ohair@286: private int peekPos; // track start of next Token for peek() ohair@286: ohair@286: /** ohair@286: * RFC822 specials ohair@286: */ ohair@286: private final static String RFC822 = "()<>@,;:\\\"\t .[]"; ohair@286: ohair@286: /** ohair@286: * MIME specials ohair@286: */ ohair@286: final static String MIME = "()<>@,;:\\\"\t []/?="; ohair@286: ohair@286: // The EOF Token ohair@286: private final static Token EOFToken = new Token(Token.EOF, null); ohair@286: ohair@286: /** ohair@286: * Constructor that takes a rfc822 style header. ohair@286: * ohair@286: * @param header The rfc822 header to be tokenized ohair@286: * @param delimiters Set of delimiter characters ohair@286: * to be used to delimit ATOMS. These ohair@286: * are usually RFC822 or ohair@286: * MIME ohair@286: * @param skipComments If true, comments are skipped and ohair@286: * not returned as tokens ohair@286: */ ohair@286: HeaderTokenizer(String header, String delimiters, ohair@286: boolean skipComments) { ohair@286: string = (header == null) ? "" : header; // paranoia ?! ohair@286: this.skipComments = skipComments; ohair@286: this.delimiters = delimiters; ohair@286: currentPos = nextPos = peekPos = 0; ohair@286: maxPos = string.length(); ohair@286: } ohair@286: ohair@286: /** ohair@286: * Constructor. Comments are ignored and not returned as tokens ohair@286: * ohair@286: * @param header The header that is tokenized ohair@286: * @param delimiters The delimiters to be used ohair@286: */ ohair@286: HeaderTokenizer(String header, String delimiters) { ohair@286: this(header, delimiters, true); ohair@286: } ohair@286: ohair@286: /** ohair@286: * Constructor. The RFC822 defined delimiters - RFC822 - are ohair@286: * used to delimit ATOMS. Also comments are skipped and not ohair@286: * returned as tokens ohair@286: */ ohair@286: HeaderTokenizer(String header) { ohair@286: this(header, RFC822); ohair@286: } ohair@286: ohair@286: /** ohair@286: * Parses the next token from this String.

ohair@286: * ohair@286: * Clients sit in a loop calling next() to parse successive ohair@286: * tokens until an EOF Token is returned. ohair@286: * ohair@286: * @return the next Token ohair@286: * @exception WebServiceException if the parse fails ohair@286: */ ohair@286: Token next() throws WebServiceException { ohair@286: Token tk; ohair@286: ohair@286: currentPos = nextPos; // setup currentPos ohair@286: tk = getNext(); ohair@286: nextPos = peekPos = currentPos; // update currentPos and peekPos ohair@286: return tk; ohair@286: } ohair@286: ohair@286: /** ohair@286: * Peek at the next token, without actually removing the token ohair@286: * from the parse stream. Invoking this method multiple times ohair@286: * will return successive tokens, until next() is ohair@286: * called.

ohair@286: * ohair@286: * @return the next Token ohair@286: * @exception WebServiceException if the parse fails ohair@286: */ ohair@286: Token peek() throws WebServiceException { ohair@286: Token tk; ohair@286: ohair@286: currentPos = peekPos; // setup currentPos ohair@286: tk = getNext(); ohair@286: peekPos = currentPos; // update peekPos ohair@286: return tk; ohair@286: } ohair@286: ohair@286: /** ohair@286: * Return the rest of the Header. ohair@286: * ohair@286: * @return String rest of header. null is returned if we are ohair@286: * already at end of header ohair@286: */ ohair@286: String getRemainder() { ohair@286: return string.substring(nextPos); ohair@286: } ohair@286: ohair@286: /* ohair@286: * Return the next token starting from 'currentPos'. After the ohair@286: * parse, 'currentPos' is updated to point to the start of the ohair@286: * next token. ohair@286: */ ohair@286: private Token getNext() throws WebServiceException { ohair@286: // If we're already at end of string, return EOF ohair@286: if (currentPos >= maxPos) ohair@286: return EOFToken; ohair@286: ohair@286: // Skip white-space, position currentPos beyond the space ohair@286: if (skipWhiteSpace() == Token.EOF) ohair@286: return EOFToken; ohair@286: ohair@286: char c; ohair@286: int start; ohair@286: boolean filter = false; ohair@286: ohair@286: c = string.charAt(currentPos); ohair@286: ohair@286: // Check or Skip comments and position currentPos ohair@286: // beyond the comment ohair@286: while (c == '(') { ohair@286: // Parsing comment .. ohair@286: int nesting; ohair@286: for (start = ++currentPos, nesting = 1; ohair@286: nesting > 0 && currentPos < maxPos; ohair@286: currentPos++) { ohair@286: c = string.charAt(currentPos); ohair@286: if (c == '\\') { // Escape sequence ohair@286: currentPos++; // skip the escaped character ohair@286: filter = true; ohair@286: } else if (c == '\r') ohair@286: filter = true; ohair@286: else if (c == '(') ohair@286: nesting++; ohair@286: else if (c == ')') ohair@286: nesting--; ohair@286: } ohair@286: if (nesting != 0) ohair@286: throw new WebServiceException("Unbalanced comments"); ohair@286: ohair@286: if (!skipComments) { ohair@286: // Return the comment, if we are asked to. ohair@286: // Note that the comment start & end markers are ignored. ohair@286: String s; ohair@286: if (filter) // need to go thru the token again. ohair@286: s = filterToken(string, start, currentPos-1); ohair@286: else ohair@286: s = string.substring(start,currentPos-1); ohair@286: ohair@286: return new Token(Token.COMMENT, s); ohair@286: } ohair@286: ohair@286: // Skip any whitespace after the comment. ohair@286: if (skipWhiteSpace() == Token.EOF) ohair@286: return EOFToken; ohair@286: c = string.charAt(currentPos); ohair@286: } ohair@286: ohair@286: // Check for quoted-string and position currentPos ohair@286: // beyond the terminating quote ohair@286: if (c == '"') { ohair@286: for (start = ++currentPos; currentPos < maxPos; currentPos++) { ohair@286: c = string.charAt(currentPos); ohair@286: if (c == '\\') { // Escape sequence ohair@286: currentPos++; ohair@286: filter = true; ohair@286: } else if (c == '\r') ohair@286: filter = true; ohair@286: else if (c == '"') { ohair@286: currentPos++; ohair@286: String s; ohair@286: ohair@286: if (filter) ohair@286: s = filterToken(string, start, currentPos-1); ohair@286: else ohair@286: s = string.substring(start,currentPos-1); ohair@286: ohair@286: return new Token(Token.QUOTEDSTRING, s); ohair@286: } ohair@286: } ohair@286: throw new WebServiceException("Unbalanced quoted string"); ohair@286: } ohair@286: ohair@286: // Check for SPECIAL or CTL ohair@286: if (c < 040 || c >= 0177 || delimiters.indexOf(c) >= 0) { ohair@286: currentPos++; // re-position currentPos ohair@286: char ch[] = new char[1]; ohair@286: ch[0] = c; ohair@286: return new Token((int)c, new String(ch)); ohair@286: } ohair@286: ohair@286: // Check for ATOM ohair@286: for (start = currentPos; currentPos < maxPos; currentPos++) { ohair@286: c = string.charAt(currentPos); ohair@286: // ATOM is delimited by either SPACE, CTL, "(", <"> ohair@286: // or the specified SPECIALS ohair@286: if (c < 040 || c >= 0177 || c == '(' || c == ' ' || ohair@286: c == '"' || delimiters.indexOf(c) >= 0) ohair@286: break; ohair@286: } ohair@286: return new Token(Token.ATOM, string.substring(start, currentPos)); ohair@286: } ohair@286: ohair@286: // Skip SPACE, HT, CR and NL ohair@286: private int skipWhiteSpace() { ohair@286: char c; ohair@286: for (; currentPos < maxPos; currentPos++) ohair@286: if (((c = string.charAt(currentPos)) != ' ') && ohair@286: (c != '\t') && (c != '\r') && (c != '\n')) ohair@286: return currentPos; ohair@286: return Token.EOF; ohair@286: } ohair@286: ohair@286: /* Process escape sequences and embedded LWSPs from a comment or ohair@286: * quoted string. ohair@286: */ ohair@286: private static String filterToken(String s, int start, int end) { ohair@286: StringBuffer sb = new StringBuffer(); ohair@286: char c; ohair@286: boolean gotEscape = false; ohair@286: boolean gotCR = false; ohair@286: ohair@286: for (int i = start; i < end; i++) { ohair@286: c = s.charAt(i); ohair@286: if (c == '\n' && gotCR) { ohair@286: // This LF is part of an unescaped ohair@286: // CRLF sequence (i.e, LWSP). Skip it. ohair@286: gotCR = false; ohair@286: continue; ohair@286: } ohair@286: ohair@286: gotCR = false; ohair@286: if (!gotEscape) { ohair@286: // Previous character was NOT '\' ohair@286: if (c == '\\') // skip this character ohair@286: gotEscape = true; ohair@286: else if (c == '\r') // skip this character ohair@286: gotCR = true; ohair@286: else // append this character ohair@286: sb.append(c); ohair@286: } else { ohair@286: // Previous character was '\'. So no need to ohair@286: // bother with any special processing, just ohair@286: // append this character ohair@286: sb.append(c); ohair@286: gotEscape = false; ohair@286: } ohair@286: } ohair@286: return sb.toString(); ohair@286: } ohair@286: }