aoqi@0: /* aoqi@0: * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. aoqi@0: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. aoqi@0: * aoqi@0: * This code is free software; you can redistribute it and/or modify it aoqi@0: * under the terms of the GNU General Public License version 2 only, as aoqi@0: * published by the Free Software Foundation. Oracle designates this aoqi@0: * particular file as subject to the "Classpath" exception as provided aoqi@0: * by Oracle in the LICENSE file that accompanied this code. aoqi@0: * aoqi@0: * This code is distributed in the hope that it will be useful, but WITHOUT aoqi@0: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or aoqi@0: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License aoqi@0: * version 2 for more details (a copy is included in the LICENSE file that aoqi@0: * accompanied this code). aoqi@0: * aoqi@0: * You should have received a copy of the GNU General Public License version aoqi@0: * 2 along with this work; if not, write to the Free Software Foundation, aoqi@0: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. aoqi@0: * aoqi@0: * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA aoqi@0: * or visit www.oracle.com if you need additional information or have any aoqi@0: * questions. aoqi@0: */ aoqi@0: aoqi@0: package com.sun.xml.internal.ws.encoding; aoqi@0: aoqi@0: import javax.xml.ws.WebServiceException; aoqi@0: aoqi@0: /** aoqi@0: * This class tokenizes RFC822 and MIME headers into the basic aoqi@0: * symbols specified by RFC822 and MIME.

aoqi@0: * aoqi@0: * This class handles folded headers (ie headers with embedded aoqi@0: * CRLF SPACE sequences). The folds are removed in the returned aoqi@0: * tokens. aoqi@0: * aoqi@0: * @version 1.9, 02/03/27 aoqi@0: * @author John Mani aoqi@0: */ aoqi@0: aoqi@0: class HeaderTokenizer { aoqi@0: aoqi@0: /** aoqi@0: * The Token class represents tokens returned by the aoqi@0: * HeaderTokenizer. aoqi@0: */ aoqi@0: static class Token { aoqi@0: aoqi@0: private int type; aoqi@0: private String value; aoqi@0: aoqi@0: /** aoqi@0: * Token type indicating an ATOM. aoqi@0: */ aoqi@0: public static final int ATOM = -1; aoqi@0: aoqi@0: /** aoqi@0: * Token type indicating a quoted string. The value aoqi@0: * field contains the string without the quotes. aoqi@0: */ aoqi@0: public static final int QUOTEDSTRING = -2; aoqi@0: aoqi@0: /** aoqi@0: * Token type indicating a comment. The value field aoqi@0: * contains the comment string without the comment aoqi@0: * start and end symbols. aoqi@0: */ aoqi@0: public static final int COMMENT = -3; aoqi@0: aoqi@0: /** aoqi@0: * Token type indicating end of input. aoqi@0: */ aoqi@0: public static final int EOF = -4; aoqi@0: aoqi@0: /** aoqi@0: * Constructor. aoqi@0: * @param type Token type aoqi@0: * @param value Token value aoqi@0: */ aoqi@0: public Token(int type, String value) { aoqi@0: this.type = type; aoqi@0: this.value = value; aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Return the type of the token. If the token represents a aoqi@0: * delimiter or a control character, the type is that character aoqi@0: * itself, converted to an integer. Otherwise, it's value is aoqi@0: * one of the following: aoqi@0: *

aoqi@0: */ aoqi@0: public int getType() { aoqi@0: return type; aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Returns the value of the token just read. When the current aoqi@0: * token is a quoted string, this field contains the body of the aoqi@0: * string, without the quotes. When the current token is a comment, aoqi@0: * this field contains the body of the comment. aoqi@0: * aoqi@0: * @return token value aoqi@0: */ aoqi@0: public String getValue() { aoqi@0: return value; aoqi@0: } aoqi@0: } aoqi@0: aoqi@0: private String string; // the string to be tokenized aoqi@0: private boolean skipComments; // should comments be skipped ? aoqi@0: private String delimiters; // delimiter string aoqi@0: private int currentPos; // current parse position aoqi@0: private int maxPos; // string length aoqi@0: private int nextPos; // track start of next Token for next() aoqi@0: private int peekPos; // track start of next Token for peek() aoqi@0: aoqi@0: /** aoqi@0: * RFC822 specials aoqi@0: */ aoqi@0: private final static String RFC822 = "()<>@,;:\\\"\t .[]"; aoqi@0: aoqi@0: /** aoqi@0: * MIME specials aoqi@0: */ aoqi@0: final static String MIME = "()<>@,;:\\\"\t []/?="; aoqi@0: aoqi@0: // The EOF Token aoqi@0: private final static Token EOFToken = new Token(Token.EOF, null); aoqi@0: aoqi@0: /** aoqi@0: * Constructor that takes a rfc822 style header. aoqi@0: * aoqi@0: * @param header The rfc822 header to be tokenized aoqi@0: * @param delimiters Set of delimiter characters aoqi@0: * to be used to delimit ATOMS. These aoqi@0: * are usually RFC822 or aoqi@0: * MIME aoqi@0: * @param skipComments If true, comments are skipped and aoqi@0: * not returned as tokens aoqi@0: */ aoqi@0: HeaderTokenizer(String header, String delimiters, aoqi@0: boolean skipComments) { aoqi@0: string = (header == null) ? "" : header; // paranoia ?! aoqi@0: this.skipComments = skipComments; aoqi@0: this.delimiters = delimiters; aoqi@0: currentPos = nextPos = peekPos = 0; aoqi@0: maxPos = string.length(); aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Constructor. Comments are ignored and not returned as tokens aoqi@0: * aoqi@0: * @param header The header that is tokenized aoqi@0: * @param delimiters The delimiters to be used aoqi@0: */ aoqi@0: HeaderTokenizer(String header, String delimiters) { aoqi@0: this(header, delimiters, true); aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Constructor. The RFC822 defined delimiters - RFC822 - are aoqi@0: * used to delimit ATOMS. Also comments are skipped and not aoqi@0: * returned as tokens aoqi@0: */ aoqi@0: HeaderTokenizer(String header) { aoqi@0: this(header, RFC822); aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Parses the next token from this String.

aoqi@0: * aoqi@0: * Clients sit in a loop calling next() to parse successive aoqi@0: * tokens until an EOF Token is returned. aoqi@0: * aoqi@0: * @return the next Token aoqi@0: * @exception WebServiceException if the parse fails aoqi@0: */ aoqi@0: Token next() throws WebServiceException { aoqi@0: Token tk; aoqi@0: aoqi@0: currentPos = nextPos; // setup currentPos aoqi@0: tk = getNext(); aoqi@0: nextPos = peekPos = currentPos; // update currentPos and peekPos aoqi@0: return tk; aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Peek at the next token, without actually removing the token aoqi@0: * from the parse stream. Invoking this method multiple times aoqi@0: * will return successive tokens, until next() is aoqi@0: * called.

aoqi@0: * aoqi@0: * @return the next Token aoqi@0: * @exception WebServiceException if the parse fails aoqi@0: */ aoqi@0: Token peek() throws WebServiceException { aoqi@0: Token tk; aoqi@0: aoqi@0: currentPos = peekPos; // setup currentPos aoqi@0: tk = getNext(); aoqi@0: peekPos = currentPos; // update peekPos aoqi@0: return tk; aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Return the rest of the Header. aoqi@0: * aoqi@0: * @return String rest of header. null is returned if we are aoqi@0: * already at end of header aoqi@0: */ aoqi@0: String getRemainder() { aoqi@0: return string.substring(nextPos); aoqi@0: } aoqi@0: aoqi@0: /* aoqi@0: * Return the next token starting from 'currentPos'. After the aoqi@0: * parse, 'currentPos' is updated to point to the start of the aoqi@0: * next token. aoqi@0: */ aoqi@0: private Token getNext() throws WebServiceException { aoqi@0: // If we're already at end of string, return EOF aoqi@0: if (currentPos >= maxPos) aoqi@0: return EOFToken; aoqi@0: aoqi@0: // Skip white-space, position currentPos beyond the space aoqi@0: if (skipWhiteSpace() == Token.EOF) aoqi@0: return EOFToken; aoqi@0: aoqi@0: char c; aoqi@0: int start; aoqi@0: boolean filter = false; aoqi@0: aoqi@0: c = string.charAt(currentPos); aoqi@0: aoqi@0: // Check or Skip comments and position currentPos aoqi@0: // beyond the comment aoqi@0: while (c == '(') { aoqi@0: // Parsing comment .. aoqi@0: int nesting; aoqi@0: for (start = ++currentPos, nesting = 1; aoqi@0: nesting > 0 && currentPos < maxPos; aoqi@0: currentPos++) { aoqi@0: c = string.charAt(currentPos); aoqi@0: if (c == '\\') { // Escape sequence aoqi@0: currentPos++; // skip the escaped character aoqi@0: filter = true; aoqi@0: } else if (c == '\r') aoqi@0: filter = true; aoqi@0: else if (c == '(') aoqi@0: nesting++; aoqi@0: else if (c == ')') aoqi@0: nesting--; aoqi@0: } aoqi@0: if (nesting != 0) aoqi@0: throw new WebServiceException("Unbalanced comments"); aoqi@0: aoqi@0: if (!skipComments) { aoqi@0: // Return the comment, if we are asked to. aoqi@0: // Note that the comment start & end markers are ignored. aoqi@0: String s; aoqi@0: if (filter) // need to go thru the token again. aoqi@0: s = filterToken(string, start, currentPos-1); aoqi@0: else aoqi@0: s = string.substring(start,currentPos-1); aoqi@0: aoqi@0: return new Token(Token.COMMENT, s); aoqi@0: } aoqi@0: aoqi@0: // Skip any whitespace after the comment. aoqi@0: if (skipWhiteSpace() == Token.EOF) aoqi@0: return EOFToken; aoqi@0: c = string.charAt(currentPos); aoqi@0: } aoqi@0: aoqi@0: // Check for quoted-string and position currentPos aoqi@0: // beyond the terminating quote aoqi@0: if (c == '"') { aoqi@0: for (start = ++currentPos; currentPos < maxPos; currentPos++) { aoqi@0: c = string.charAt(currentPos); aoqi@0: if (c == '\\') { // Escape sequence aoqi@0: currentPos++; aoqi@0: filter = true; aoqi@0: } else if (c == '\r') aoqi@0: filter = true; aoqi@0: else if (c == '"') { aoqi@0: currentPos++; aoqi@0: String s; aoqi@0: aoqi@0: if (filter) aoqi@0: s = filterToken(string, start, currentPos-1); aoqi@0: else aoqi@0: s = string.substring(start,currentPos-1); aoqi@0: aoqi@0: return new Token(Token.QUOTEDSTRING, s); aoqi@0: } aoqi@0: } aoqi@0: throw new WebServiceException("Unbalanced quoted string"); aoqi@0: } aoqi@0: aoqi@0: // Check for SPECIAL or CTL aoqi@0: if (c < 040 || c >= 0177 || delimiters.indexOf(c) >= 0) { aoqi@0: currentPos++; // re-position currentPos aoqi@0: char ch[] = new char[1]; aoqi@0: ch[0] = c; aoqi@0: return new Token((int)c, new String(ch)); aoqi@0: } aoqi@0: aoqi@0: // Check for ATOM aoqi@0: for (start = currentPos; currentPos < maxPos; currentPos++) { aoqi@0: c = string.charAt(currentPos); aoqi@0: // ATOM is delimited by either SPACE, CTL, "(", <"> aoqi@0: // or the specified SPECIALS aoqi@0: if (c < 040 || c >= 0177 || c == '(' || c == ' ' || aoqi@0: c == '"' || delimiters.indexOf(c) >= 0) aoqi@0: break; aoqi@0: } aoqi@0: return new Token(Token.ATOM, string.substring(start, currentPos)); aoqi@0: } aoqi@0: aoqi@0: // Skip SPACE, HT, CR and NL aoqi@0: private int skipWhiteSpace() { aoqi@0: char c; aoqi@0: for (; currentPos < maxPos; currentPos++) aoqi@0: if (((c = string.charAt(currentPos)) != ' ') && aoqi@0: (c != '\t') && (c != '\r') && (c != '\n')) aoqi@0: return currentPos; aoqi@0: return Token.EOF; aoqi@0: } aoqi@0: aoqi@0: /* Process escape sequences and embedded LWSPs from a comment or aoqi@0: * quoted string. aoqi@0: */ aoqi@0: private static String filterToken(String s, int start, int end) { aoqi@0: StringBuffer sb = new StringBuffer(); aoqi@0: char c; aoqi@0: boolean gotEscape = false; aoqi@0: boolean gotCR = false; aoqi@0: aoqi@0: for (int i = start; i < end; i++) { aoqi@0: c = s.charAt(i); aoqi@0: if (c == '\n' && gotCR) { aoqi@0: // This LF is part of an unescaped aoqi@0: // CRLF sequence (i.e, LWSP). Skip it. aoqi@0: gotCR = false; aoqi@0: continue; aoqi@0: } aoqi@0: aoqi@0: gotCR = false; aoqi@0: if (!gotEscape) { aoqi@0: // Previous character was NOT '\' aoqi@0: if (c == '\\') // skip this character aoqi@0: gotEscape = true; aoqi@0: else if (c == '\r') // skip this character aoqi@0: gotCR = true; aoqi@0: else // append this character aoqi@0: sb.append(c); aoqi@0: } else { aoqi@0: // Previous character was '\'. So no need to aoqi@0: // bother with any special processing, just aoqi@0: // append this character aoqi@0: sb.append(c); aoqi@0: gotEscape = false; aoqi@0: } aoqi@0: } aoqi@0: return sb.toString(); aoqi@0: } aoqi@0: }