1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/share/jaxws_classes/com/sun/xml/internal/ws/encoding/HeaderTokenizer.java Wed Apr 27 01:27:09 2016 +0800 1.3 @@ -0,0 +1,377 @@ 1.4 +/* 1.5 + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. Oracle designates this 1.11 + * particular file as subject to the "Classpath" exception as provided 1.12 + * by Oracle in the LICENSE file that accompanied this code. 1.13 + * 1.14 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.16 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.17 + * version 2 for more details (a copy is included in the LICENSE file that 1.18 + * accompanied this code). 1.19 + * 1.20 + * You should have received a copy of the GNU General Public License version 1.21 + * 2 along with this work; if not, write to the Free Software Foundation, 1.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.23 + * 1.24 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 1.25 + * or visit www.oracle.com if you need additional information or have any 1.26 + * questions. 1.27 + */ 1.28 + 1.29 +package com.sun.xml.internal.ws.encoding; 1.30 + 1.31 +import javax.xml.ws.WebServiceException; 1.32 + 1.33 +/** 1.34 + * This class tokenizes RFC822 and MIME headers into the basic 1.35 + * symbols specified by RFC822 and MIME. <p> 1.36 + * 1.37 + * This class handles folded headers (ie headers with embedded 1.38 + * CRLF SPACE sequences). The folds are removed in the returned 1.39 + * tokens. 1.40 + * 1.41 + * @version 1.9, 02/03/27 1.42 + * @author John Mani 1.43 + */ 1.44 + 1.45 +class HeaderTokenizer { 1.46 + 1.47 + /** 1.48 + * The Token class represents tokens returned by the 1.49 + * HeaderTokenizer. 1.50 + */ 1.51 + static class Token { 1.52 + 1.53 + private int type; 1.54 + private String value; 1.55 + 1.56 + /** 1.57 + * Token type indicating an ATOM. 1.58 + */ 1.59 + public static final int ATOM = -1; 1.60 + 1.61 + /** 1.62 + * Token type indicating a quoted string. The value 1.63 + * field contains the string without the quotes. 1.64 + */ 1.65 + public static final int QUOTEDSTRING = -2; 1.66 + 1.67 + /** 1.68 + * Token type indicating a comment. The value field 1.69 + * contains the comment string without the comment 1.70 + * start and end symbols. 1.71 + */ 1.72 + public static final int COMMENT = -3; 1.73 + 1.74 + /** 1.75 + * Token type indicating end of input. 1.76 + */ 1.77 + public static final int EOF = -4; 1.78 + 1.79 + /** 1.80 + * Constructor. 1.81 + * @param type Token type 1.82 + * @param value Token value 1.83 + */ 1.84 + public Token(int type, String value) { 1.85 + this.type = type; 1.86 + this.value = value; 1.87 + } 1.88 + 1.89 + /** 1.90 + * Return the type of the token. If the token represents a 1.91 + * delimiter or a control character, the type is that character 1.92 + * itself, converted to an integer. Otherwise, it's value is 1.93 + * one of the following: 1.94 + * <ul> 1.95 + * <li><code>ATOM</code> A sequence of ASCII characters 1.96 + * delimited by either SPACE, CTL, "(", <"> or the 1.97 + * specified SPECIALS 1.98 + * <li><code>QUOTEDSTRING</code> A sequence of ASCII characters 1.99 + * within quotes 1.100 + * <li><code>COMMENT</code> A sequence of ASCII characters 1.101 + * within "(" and ")". 1.102 + * <li><code>EOF</code> End of header 1.103 + * </ul> 1.104 + */ 1.105 + public int getType() { 1.106 + return type; 1.107 + } 1.108 + 1.109 + /** 1.110 + * Returns the value of the token just read. When the current 1.111 + * token is a quoted string, this field contains the body of the 1.112 + * string, without the quotes. When the current token is a comment, 1.113 + * this field contains the body of the comment. 1.114 + * 1.115 + * @return token value 1.116 + */ 1.117 + public String getValue() { 1.118 + return value; 1.119 + } 1.120 + } 1.121 + 1.122 + private String string; // the string to be tokenized 1.123 + private boolean skipComments; // should comments be skipped ? 1.124 + private String delimiters; // delimiter string 1.125 + private int currentPos; // current parse position 1.126 + private int maxPos; // string length 1.127 + private int nextPos; // track start of next Token for next() 1.128 + private int peekPos; // track start of next Token for peek() 1.129 + 1.130 + /** 1.131 + * RFC822 specials 1.132 + */ 1.133 + private final static String RFC822 = "()<>@,;:\\\"\t .[]"; 1.134 + 1.135 + /** 1.136 + * MIME specials 1.137 + */ 1.138 + final static String MIME = "()<>@,;:\\\"\t []/?="; 1.139 + 1.140 + // The EOF Token 1.141 + private final static Token EOFToken = new Token(Token.EOF, null); 1.142 + 1.143 + /** 1.144 + * Constructor that takes a rfc822 style header. 1.145 + * 1.146 + * @param header The rfc822 header to be tokenized 1.147 + * @param delimiters Set of delimiter characters 1.148 + * to be used to delimit ATOMS. These 1.149 + * are usually <code>RFC822</code> or 1.150 + * <code>MIME</code> 1.151 + * @param skipComments If true, comments are skipped and 1.152 + * not returned as tokens 1.153 + */ 1.154 + HeaderTokenizer(String header, String delimiters, 1.155 + boolean skipComments) { 1.156 + string = (header == null) ? "" : header; // paranoia ?! 1.157 + this.skipComments = skipComments; 1.158 + this.delimiters = delimiters; 1.159 + currentPos = nextPos = peekPos = 0; 1.160 + maxPos = string.length(); 1.161 + } 1.162 + 1.163 + /** 1.164 + * Constructor. Comments are ignored and not returned as tokens 1.165 + * 1.166 + * @param header The header that is tokenized 1.167 + * @param delimiters The delimiters to be used 1.168 + */ 1.169 + HeaderTokenizer(String header, String delimiters) { 1.170 + this(header, delimiters, true); 1.171 + } 1.172 + 1.173 + /** 1.174 + * Constructor. The RFC822 defined delimiters - RFC822 - are 1.175 + * used to delimit ATOMS. Also comments are skipped and not 1.176 + * returned as tokens 1.177 + */ 1.178 + HeaderTokenizer(String header) { 1.179 + this(header, RFC822); 1.180 + } 1.181 + 1.182 + /** 1.183 + * Parses the next token from this String. <p> 1.184 + * 1.185 + * Clients sit in a loop calling next() to parse successive 1.186 + * tokens until an EOF Token is returned. 1.187 + * 1.188 + * @return the next Token 1.189 + * @exception WebServiceException if the parse fails 1.190 + */ 1.191 + Token next() throws WebServiceException { 1.192 + Token tk; 1.193 + 1.194 + currentPos = nextPos; // setup currentPos 1.195 + tk = getNext(); 1.196 + nextPos = peekPos = currentPos; // update currentPos and peekPos 1.197 + return tk; 1.198 + } 1.199 + 1.200 + /** 1.201 + * Peek at the next token, without actually removing the token 1.202 + * from the parse stream. Invoking this method multiple times 1.203 + * will return successive tokens, until <code>next()</code> is 1.204 + * called. <p> 1.205 + * 1.206 + * @return the next Token 1.207 + * @exception WebServiceException if the parse fails 1.208 + */ 1.209 + Token peek() throws WebServiceException { 1.210 + Token tk; 1.211 + 1.212 + currentPos = peekPos; // setup currentPos 1.213 + tk = getNext(); 1.214 + peekPos = currentPos; // update peekPos 1.215 + return tk; 1.216 + } 1.217 + 1.218 + /** 1.219 + * Return the rest of the Header. 1.220 + * 1.221 + * @return String rest of header. null is returned if we are 1.222 + * already at end of header 1.223 + */ 1.224 + String getRemainder() { 1.225 + return string.substring(nextPos); 1.226 + } 1.227 + 1.228 + /* 1.229 + * Return the next token starting from 'currentPos'. After the 1.230 + * parse, 'currentPos' is updated to point to the start of the 1.231 + * next token. 1.232 + */ 1.233 + private Token getNext() throws WebServiceException { 1.234 + // If we're already at end of string, return EOF 1.235 + if (currentPos >= maxPos) 1.236 + return EOFToken; 1.237 + 1.238 + // Skip white-space, position currentPos beyond the space 1.239 + if (skipWhiteSpace() == Token.EOF) 1.240 + return EOFToken; 1.241 + 1.242 + char c; 1.243 + int start; 1.244 + boolean filter = false; 1.245 + 1.246 + c = string.charAt(currentPos); 1.247 + 1.248 + // Check or Skip comments and position currentPos 1.249 + // beyond the comment 1.250 + while (c == '(') { 1.251 + // Parsing comment .. 1.252 + int nesting; 1.253 + for (start = ++currentPos, nesting = 1; 1.254 + nesting > 0 && currentPos < maxPos; 1.255 + currentPos++) { 1.256 + c = string.charAt(currentPos); 1.257 + if (c == '\\') { // Escape sequence 1.258 + currentPos++; // skip the escaped character 1.259 + filter = true; 1.260 + } else if (c == '\r') 1.261 + filter = true; 1.262 + else if (c == '(') 1.263 + nesting++; 1.264 + else if (c == ')') 1.265 + nesting--; 1.266 + } 1.267 + if (nesting != 0) 1.268 + throw new WebServiceException("Unbalanced comments"); 1.269 + 1.270 + if (!skipComments) { 1.271 + // Return the comment, if we are asked to. 1.272 + // Note that the comment start & end markers are ignored. 1.273 + String s; 1.274 + if (filter) // need to go thru the token again. 1.275 + s = filterToken(string, start, currentPos-1); 1.276 + else 1.277 + s = string.substring(start,currentPos-1); 1.278 + 1.279 + return new Token(Token.COMMENT, s); 1.280 + } 1.281 + 1.282 + // Skip any whitespace after the comment. 1.283 + if (skipWhiteSpace() == Token.EOF) 1.284 + return EOFToken; 1.285 + c = string.charAt(currentPos); 1.286 + } 1.287 + 1.288 + // Check for quoted-string and position currentPos 1.289 + // beyond the terminating quote 1.290 + if (c == '"') { 1.291 + for (start = ++currentPos; currentPos < maxPos; currentPos++) { 1.292 + c = string.charAt(currentPos); 1.293 + if (c == '\\') { // Escape sequence 1.294 + currentPos++; 1.295 + filter = true; 1.296 + } else if (c == '\r') 1.297 + filter = true; 1.298 + else if (c == '"') { 1.299 + currentPos++; 1.300 + String s; 1.301 + 1.302 + if (filter) 1.303 + s = filterToken(string, start, currentPos-1); 1.304 + else 1.305 + s = string.substring(start,currentPos-1); 1.306 + 1.307 + return new Token(Token.QUOTEDSTRING, s); 1.308 + } 1.309 + } 1.310 + throw new WebServiceException("Unbalanced quoted string"); 1.311 + } 1.312 + 1.313 + // Check for SPECIAL or CTL 1.314 + if (c < 040 || c >= 0177 || delimiters.indexOf(c) >= 0) { 1.315 + currentPos++; // re-position currentPos 1.316 + char ch[] = new char[1]; 1.317 + ch[0] = c; 1.318 + return new Token((int)c, new String(ch)); 1.319 + } 1.320 + 1.321 + // Check for ATOM 1.322 + for (start = currentPos; currentPos < maxPos; currentPos++) { 1.323 + c = string.charAt(currentPos); 1.324 + // ATOM is delimited by either SPACE, CTL, "(", <"> 1.325 + // or the specified SPECIALS 1.326 + if (c < 040 || c >= 0177 || c == '(' || c == ' ' || 1.327 + c == '"' || delimiters.indexOf(c) >= 0) 1.328 + break; 1.329 + } 1.330 + return new Token(Token.ATOM, string.substring(start, currentPos)); 1.331 + } 1.332 + 1.333 + // Skip SPACE, HT, CR and NL 1.334 + private int skipWhiteSpace() { 1.335 + char c; 1.336 + for (; currentPos < maxPos; currentPos++) 1.337 + if (((c = string.charAt(currentPos)) != ' ') && 1.338 + (c != '\t') && (c != '\r') && (c != '\n')) 1.339 + return currentPos; 1.340 + return Token.EOF; 1.341 + } 1.342 + 1.343 + /* Process escape sequences and embedded LWSPs from a comment or 1.344 + * quoted string. 1.345 + */ 1.346 + private static String filterToken(String s, int start, int end) { 1.347 + StringBuffer sb = new StringBuffer(); 1.348 + char c; 1.349 + boolean gotEscape = false; 1.350 + boolean gotCR = false; 1.351 + 1.352 + for (int i = start; i < end; i++) { 1.353 + c = s.charAt(i); 1.354 + if (c == '\n' && gotCR) { 1.355 + // This LF is part of an unescaped 1.356 + // CRLF sequence (i.e, LWSP). Skip it. 1.357 + gotCR = false; 1.358 + continue; 1.359 + } 1.360 + 1.361 + gotCR = false; 1.362 + if (!gotEscape) { 1.363 + // Previous character was NOT '\' 1.364 + if (c == '\\') // skip this character 1.365 + gotEscape = true; 1.366 + else if (c == '\r') // skip this character 1.367 + gotCR = true; 1.368 + else // append this character 1.369 + sb.append(c); 1.370 + } else { 1.371 + // Previous character was '\'. So no need to 1.372 + // bother with any special processing, just 1.373 + // append this character 1.374 + sb.append(c); 1.375 + gotEscape = false; 1.376 + } 1.377 + } 1.378 + return sb.toString(); 1.379 + } 1.380 +}