Wed, 27 Apr 2016 01:27:09 +0800
Initial load
http://hg.openjdk.java.net/jdk8u/jdk8u/jaxws/
changeset: 657:d47a47f961ee
tag: jdk8u25-b17
aoqi@0 | 1 | /* |
aoqi@0 | 2 | * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. |
aoqi@0 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
aoqi@0 | 4 | * |
aoqi@0 | 5 | * This code is free software; you can redistribute it and/or modify it |
aoqi@0 | 6 | * under the terms of the GNU General Public License version 2 only, as |
aoqi@0 | 7 | * published by the Free Software Foundation. Oracle designates this |
aoqi@0 | 8 | * particular file as subject to the "Classpath" exception as provided |
aoqi@0 | 9 | * by Oracle in the LICENSE file that accompanied this code. |
aoqi@0 | 10 | * |
aoqi@0 | 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
aoqi@0 | 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
aoqi@0 | 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
aoqi@0 | 14 | * version 2 for more details (a copy is included in the LICENSE file that |
aoqi@0 | 15 | * accompanied this code). |
aoqi@0 | 16 | * |
aoqi@0 | 17 | * You should have received a copy of the GNU General Public License version |
aoqi@0 | 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
aoqi@0 | 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
aoqi@0 | 20 | * |
aoqi@0 | 21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
aoqi@0 | 22 | * or visit www.oracle.com if you need additional information or have any |
aoqi@0 | 23 | * questions. |
aoqi@0 | 24 | */ |
aoqi@0 | 25 | |
aoqi@0 | 26 | package com.sun.xml.internal.ws.encoding; |
aoqi@0 | 27 | |
aoqi@0 | 28 | import javax.xml.ws.WebServiceException; |
aoqi@0 | 29 | |
aoqi@0 | 30 | /** |
aoqi@0 | 31 | * This class tokenizes RFC822 and MIME headers into the basic |
aoqi@0 | 32 | * symbols specified by RFC822 and MIME. <p> |
aoqi@0 | 33 | * |
aoqi@0 | 34 | * This class handles folded headers (ie headers with embedded |
aoqi@0 | 35 | * CRLF SPACE sequences). The folds are removed in the returned |
aoqi@0 | 36 | * tokens. |
aoqi@0 | 37 | * |
aoqi@0 | 38 | * @version 1.9, 02/03/27 |
aoqi@0 | 39 | * @author John Mani |
aoqi@0 | 40 | */ |
aoqi@0 | 41 | |
aoqi@0 | 42 | class HeaderTokenizer { |
aoqi@0 | 43 | |
aoqi@0 | 44 | /** |
aoqi@0 | 45 | * The Token class represents tokens returned by the |
aoqi@0 | 46 | * HeaderTokenizer. |
aoqi@0 | 47 | */ |
aoqi@0 | 48 | static class Token { |
aoqi@0 | 49 | |
aoqi@0 | 50 | private int type; |
aoqi@0 | 51 | private String value; |
aoqi@0 | 52 | |
aoqi@0 | 53 | /** |
aoqi@0 | 54 | * Token type indicating an ATOM. |
aoqi@0 | 55 | */ |
aoqi@0 | 56 | public static final int ATOM = -1; |
aoqi@0 | 57 | |
aoqi@0 | 58 | /** |
aoqi@0 | 59 | * Token type indicating a quoted string. The value |
aoqi@0 | 60 | * field contains the string without the quotes. |
aoqi@0 | 61 | */ |
aoqi@0 | 62 | public static final int QUOTEDSTRING = -2; |
aoqi@0 | 63 | |
aoqi@0 | 64 | /** |
aoqi@0 | 65 | * Token type indicating a comment. The value field |
aoqi@0 | 66 | * contains the comment string without the comment |
aoqi@0 | 67 | * start and end symbols. |
aoqi@0 | 68 | */ |
aoqi@0 | 69 | public static final int COMMENT = -3; |
aoqi@0 | 70 | |
aoqi@0 | 71 | /** |
aoqi@0 | 72 | * Token type indicating end of input. |
aoqi@0 | 73 | */ |
aoqi@0 | 74 | public static final int EOF = -4; |
aoqi@0 | 75 | |
aoqi@0 | 76 | /** |
aoqi@0 | 77 | * Constructor. |
aoqi@0 | 78 | * @param type Token type |
aoqi@0 | 79 | * @param value Token value |
aoqi@0 | 80 | */ |
aoqi@0 | 81 | public Token(int type, String value) { |
aoqi@0 | 82 | this.type = type; |
aoqi@0 | 83 | this.value = value; |
aoqi@0 | 84 | } |
aoqi@0 | 85 | |
aoqi@0 | 86 | /** |
aoqi@0 | 87 | * Return the type of the token. If the token represents a |
aoqi@0 | 88 | * delimiter or a control character, the type is that character |
aoqi@0 | 89 | * itself, converted to an integer. Otherwise, it's value is |
aoqi@0 | 90 | * one of the following: |
aoqi@0 | 91 | * <ul> |
aoqi@0 | 92 | * <li><code>ATOM</code> A sequence of ASCII characters |
aoqi@0 | 93 | * delimited by either SPACE, CTL, "(", <"> or the |
aoqi@0 | 94 | * specified SPECIALS |
aoqi@0 | 95 | * <li><code>QUOTEDSTRING</code> A sequence of ASCII characters |
aoqi@0 | 96 | * within quotes |
aoqi@0 | 97 | * <li><code>COMMENT</code> A sequence of ASCII characters |
aoqi@0 | 98 | * within "(" and ")". |
aoqi@0 | 99 | * <li><code>EOF</code> End of header |
aoqi@0 | 100 | * </ul> |
aoqi@0 | 101 | */ |
aoqi@0 | 102 | public int getType() { |
aoqi@0 | 103 | return type; |
aoqi@0 | 104 | } |
aoqi@0 | 105 | |
aoqi@0 | 106 | /** |
aoqi@0 | 107 | * Returns the value of the token just read. When the current |
aoqi@0 | 108 | * token is a quoted string, this field contains the body of the |
aoqi@0 | 109 | * string, without the quotes. When the current token is a comment, |
aoqi@0 | 110 | * this field contains the body of the comment. |
aoqi@0 | 111 | * |
aoqi@0 | 112 | * @return token value |
aoqi@0 | 113 | */ |
aoqi@0 | 114 | public String getValue() { |
aoqi@0 | 115 | return value; |
aoqi@0 | 116 | } |
aoqi@0 | 117 | } |
aoqi@0 | 118 | |
aoqi@0 | 119 | private String string; // the string to be tokenized |
aoqi@0 | 120 | private boolean skipComments; // should comments be skipped ? |
aoqi@0 | 121 | private String delimiters; // delimiter string |
aoqi@0 | 122 | private int currentPos; // current parse position |
aoqi@0 | 123 | private int maxPos; // string length |
aoqi@0 | 124 | private int nextPos; // track start of next Token for next() |
aoqi@0 | 125 | private int peekPos; // track start of next Token for peek() |
aoqi@0 | 126 | |
aoqi@0 | 127 | /** |
aoqi@0 | 128 | * RFC822 specials |
aoqi@0 | 129 | */ |
aoqi@0 | 130 | private final static String RFC822 = "()<>@,;:\\\"\t .[]"; |
aoqi@0 | 131 | |
aoqi@0 | 132 | /** |
aoqi@0 | 133 | * MIME specials |
aoqi@0 | 134 | */ |
aoqi@0 | 135 | final static String MIME = "()<>@,;:\\\"\t []/?="; |
aoqi@0 | 136 | |
aoqi@0 | 137 | // The EOF Token |
aoqi@0 | 138 | private final static Token EOFToken = new Token(Token.EOF, null); |
aoqi@0 | 139 | |
aoqi@0 | 140 | /** |
aoqi@0 | 141 | * Constructor that takes a rfc822 style header. |
aoqi@0 | 142 | * |
aoqi@0 | 143 | * @param header The rfc822 header to be tokenized |
aoqi@0 | 144 | * @param delimiters Set of delimiter characters |
aoqi@0 | 145 | * to be used to delimit ATOMS. These |
aoqi@0 | 146 | * are usually <code>RFC822</code> or |
aoqi@0 | 147 | * <code>MIME</code> |
aoqi@0 | 148 | * @param skipComments If true, comments are skipped and |
aoqi@0 | 149 | * not returned as tokens |
aoqi@0 | 150 | */ |
aoqi@0 | 151 | HeaderTokenizer(String header, String delimiters, |
aoqi@0 | 152 | boolean skipComments) { |
aoqi@0 | 153 | string = (header == null) ? "" : header; // paranoia ?! |
aoqi@0 | 154 | this.skipComments = skipComments; |
aoqi@0 | 155 | this.delimiters = delimiters; |
aoqi@0 | 156 | currentPos = nextPos = peekPos = 0; |
aoqi@0 | 157 | maxPos = string.length(); |
aoqi@0 | 158 | } |
aoqi@0 | 159 | |
aoqi@0 | 160 | /** |
aoqi@0 | 161 | * Constructor. Comments are ignored and not returned as tokens |
aoqi@0 | 162 | * |
aoqi@0 | 163 | * @param header The header that is tokenized |
aoqi@0 | 164 | * @param delimiters The delimiters to be used |
aoqi@0 | 165 | */ |
aoqi@0 | 166 | HeaderTokenizer(String header, String delimiters) { |
aoqi@0 | 167 | this(header, delimiters, true); |
aoqi@0 | 168 | } |
aoqi@0 | 169 | |
aoqi@0 | 170 | /** |
aoqi@0 | 171 | * Constructor. The RFC822 defined delimiters - RFC822 - are |
aoqi@0 | 172 | * used to delimit ATOMS. Also comments are skipped and not |
aoqi@0 | 173 | * returned as tokens |
aoqi@0 | 174 | */ |
aoqi@0 | 175 | HeaderTokenizer(String header) { |
aoqi@0 | 176 | this(header, RFC822); |
aoqi@0 | 177 | } |
aoqi@0 | 178 | |
aoqi@0 | 179 | /** |
aoqi@0 | 180 | * Parses the next token from this String. <p> |
aoqi@0 | 181 | * |
aoqi@0 | 182 | * Clients sit in a loop calling next() to parse successive |
aoqi@0 | 183 | * tokens until an EOF Token is returned. |
aoqi@0 | 184 | * |
aoqi@0 | 185 | * @return the next Token |
aoqi@0 | 186 | * @exception WebServiceException if the parse fails |
aoqi@0 | 187 | */ |
aoqi@0 | 188 | Token next() throws WebServiceException { |
aoqi@0 | 189 | Token tk; |
aoqi@0 | 190 | |
aoqi@0 | 191 | currentPos = nextPos; // setup currentPos |
aoqi@0 | 192 | tk = getNext(); |
aoqi@0 | 193 | nextPos = peekPos = currentPos; // update currentPos and peekPos |
aoqi@0 | 194 | return tk; |
aoqi@0 | 195 | } |
aoqi@0 | 196 | |
aoqi@0 | 197 | /** |
aoqi@0 | 198 | * Peek at the next token, without actually removing the token |
aoqi@0 | 199 | * from the parse stream. Invoking this method multiple times |
aoqi@0 | 200 | * will return successive tokens, until <code>next()</code> is |
aoqi@0 | 201 | * called. <p> |
aoqi@0 | 202 | * |
aoqi@0 | 203 | * @return the next Token |
aoqi@0 | 204 | * @exception WebServiceException if the parse fails |
aoqi@0 | 205 | */ |
aoqi@0 | 206 | Token peek() throws WebServiceException { |
aoqi@0 | 207 | Token tk; |
aoqi@0 | 208 | |
aoqi@0 | 209 | currentPos = peekPos; // setup currentPos |
aoqi@0 | 210 | tk = getNext(); |
aoqi@0 | 211 | peekPos = currentPos; // update peekPos |
aoqi@0 | 212 | return tk; |
aoqi@0 | 213 | } |
aoqi@0 | 214 | |
aoqi@0 | 215 | /** |
aoqi@0 | 216 | * Return the rest of the Header. |
aoqi@0 | 217 | * |
aoqi@0 | 218 | * @return String rest of header. null is returned if we are |
aoqi@0 | 219 | * already at end of header |
aoqi@0 | 220 | */ |
aoqi@0 | 221 | String getRemainder() { |
aoqi@0 | 222 | return string.substring(nextPos); |
aoqi@0 | 223 | } |
aoqi@0 | 224 | |
aoqi@0 | 225 | /* |
aoqi@0 | 226 | * Return the next token starting from 'currentPos'. After the |
aoqi@0 | 227 | * parse, 'currentPos' is updated to point to the start of the |
aoqi@0 | 228 | * next token. |
aoqi@0 | 229 | */ |
aoqi@0 | 230 | private Token getNext() throws WebServiceException { |
aoqi@0 | 231 | // If we're already at end of string, return EOF |
aoqi@0 | 232 | if (currentPos >= maxPos) |
aoqi@0 | 233 | return EOFToken; |
aoqi@0 | 234 | |
aoqi@0 | 235 | // Skip white-space, position currentPos beyond the space |
aoqi@0 | 236 | if (skipWhiteSpace() == Token.EOF) |
aoqi@0 | 237 | return EOFToken; |
aoqi@0 | 238 | |
aoqi@0 | 239 | char c; |
aoqi@0 | 240 | int start; |
aoqi@0 | 241 | boolean filter = false; |
aoqi@0 | 242 | |
aoqi@0 | 243 | c = string.charAt(currentPos); |
aoqi@0 | 244 | |
aoqi@0 | 245 | // Check or Skip comments and position currentPos |
aoqi@0 | 246 | // beyond the comment |
aoqi@0 | 247 | while (c == '(') { |
aoqi@0 | 248 | // Parsing comment .. |
aoqi@0 | 249 | int nesting; |
aoqi@0 | 250 | for (start = ++currentPos, nesting = 1; |
aoqi@0 | 251 | nesting > 0 && currentPos < maxPos; |
aoqi@0 | 252 | currentPos++) { |
aoqi@0 | 253 | c = string.charAt(currentPos); |
aoqi@0 | 254 | if (c == '\\') { // Escape sequence |
aoqi@0 | 255 | currentPos++; // skip the escaped character |
aoqi@0 | 256 | filter = true; |
aoqi@0 | 257 | } else if (c == '\r') |
aoqi@0 | 258 | filter = true; |
aoqi@0 | 259 | else if (c == '(') |
aoqi@0 | 260 | nesting++; |
aoqi@0 | 261 | else if (c == ')') |
aoqi@0 | 262 | nesting--; |
aoqi@0 | 263 | } |
aoqi@0 | 264 | if (nesting != 0) |
aoqi@0 | 265 | throw new WebServiceException("Unbalanced comments"); |
aoqi@0 | 266 | |
aoqi@0 | 267 | if (!skipComments) { |
aoqi@0 | 268 | // Return the comment, if we are asked to. |
aoqi@0 | 269 | // Note that the comment start & end markers are ignored. |
aoqi@0 | 270 | String s; |
aoqi@0 | 271 | if (filter) // need to go thru the token again. |
aoqi@0 | 272 | s = filterToken(string, start, currentPos-1); |
aoqi@0 | 273 | else |
aoqi@0 | 274 | s = string.substring(start,currentPos-1); |
aoqi@0 | 275 | |
aoqi@0 | 276 | return new Token(Token.COMMENT, s); |
aoqi@0 | 277 | } |
aoqi@0 | 278 | |
aoqi@0 | 279 | // Skip any whitespace after the comment. |
aoqi@0 | 280 | if (skipWhiteSpace() == Token.EOF) |
aoqi@0 | 281 | return EOFToken; |
aoqi@0 | 282 | c = string.charAt(currentPos); |
aoqi@0 | 283 | } |
aoqi@0 | 284 | |
aoqi@0 | 285 | // Check for quoted-string and position currentPos |
aoqi@0 | 286 | // beyond the terminating quote |
aoqi@0 | 287 | if (c == '"') { |
aoqi@0 | 288 | for (start = ++currentPos; currentPos < maxPos; currentPos++) { |
aoqi@0 | 289 | c = string.charAt(currentPos); |
aoqi@0 | 290 | if (c == '\\') { // Escape sequence |
aoqi@0 | 291 | currentPos++; |
aoqi@0 | 292 | filter = true; |
aoqi@0 | 293 | } else if (c == '\r') |
aoqi@0 | 294 | filter = true; |
aoqi@0 | 295 | else if (c == '"') { |
aoqi@0 | 296 | currentPos++; |
aoqi@0 | 297 | String s; |
aoqi@0 | 298 | |
aoqi@0 | 299 | if (filter) |
aoqi@0 | 300 | s = filterToken(string, start, currentPos-1); |
aoqi@0 | 301 | else |
aoqi@0 | 302 | s = string.substring(start,currentPos-1); |
aoqi@0 | 303 | |
aoqi@0 | 304 | return new Token(Token.QUOTEDSTRING, s); |
aoqi@0 | 305 | } |
aoqi@0 | 306 | } |
aoqi@0 | 307 | throw new WebServiceException("Unbalanced quoted string"); |
aoqi@0 | 308 | } |
aoqi@0 | 309 | |
aoqi@0 | 310 | // Check for SPECIAL or CTL |
aoqi@0 | 311 | if (c < 040 || c >= 0177 || delimiters.indexOf(c) >= 0) { |
aoqi@0 | 312 | currentPos++; // re-position currentPos |
aoqi@0 | 313 | char ch[] = new char[1]; |
aoqi@0 | 314 | ch[0] = c; |
aoqi@0 | 315 | return new Token((int)c, new String(ch)); |
aoqi@0 | 316 | } |
aoqi@0 | 317 | |
aoqi@0 | 318 | // Check for ATOM |
aoqi@0 | 319 | for (start = currentPos; currentPos < maxPos; currentPos++) { |
aoqi@0 | 320 | c = string.charAt(currentPos); |
aoqi@0 | 321 | // ATOM is delimited by either SPACE, CTL, "(", <"> |
aoqi@0 | 322 | // or the specified SPECIALS |
aoqi@0 | 323 | if (c < 040 || c >= 0177 || c == '(' || c == ' ' || |
aoqi@0 | 324 | c == '"' || delimiters.indexOf(c) >= 0) |
aoqi@0 | 325 | break; |
aoqi@0 | 326 | } |
aoqi@0 | 327 | return new Token(Token.ATOM, string.substring(start, currentPos)); |
aoqi@0 | 328 | } |
aoqi@0 | 329 | |
aoqi@0 | 330 | // Skip SPACE, HT, CR and NL |
aoqi@0 | 331 | private int skipWhiteSpace() { |
aoqi@0 | 332 | char c; |
aoqi@0 | 333 | for (; currentPos < maxPos; currentPos++) |
aoqi@0 | 334 | if (((c = string.charAt(currentPos)) != ' ') && |
aoqi@0 | 335 | (c != '\t') && (c != '\r') && (c != '\n')) |
aoqi@0 | 336 | return currentPos; |
aoqi@0 | 337 | return Token.EOF; |
aoqi@0 | 338 | } |
aoqi@0 | 339 | |
aoqi@0 | 340 | /* Process escape sequences and embedded LWSPs from a comment or |
aoqi@0 | 341 | * quoted string. |
aoqi@0 | 342 | */ |
aoqi@0 | 343 | private static String filterToken(String s, int start, int end) { |
aoqi@0 | 344 | StringBuffer sb = new StringBuffer(); |
aoqi@0 | 345 | char c; |
aoqi@0 | 346 | boolean gotEscape = false; |
aoqi@0 | 347 | boolean gotCR = false; |
aoqi@0 | 348 | |
aoqi@0 | 349 | for (int i = start; i < end; i++) { |
aoqi@0 | 350 | c = s.charAt(i); |
aoqi@0 | 351 | if (c == '\n' && gotCR) { |
aoqi@0 | 352 | // This LF is part of an unescaped |
aoqi@0 | 353 | // CRLF sequence (i.e, LWSP). Skip it. |
aoqi@0 | 354 | gotCR = false; |
aoqi@0 | 355 | continue; |
aoqi@0 | 356 | } |
aoqi@0 | 357 | |
aoqi@0 | 358 | gotCR = false; |
aoqi@0 | 359 | if (!gotEscape) { |
aoqi@0 | 360 | // Previous character was NOT '\' |
aoqi@0 | 361 | if (c == '\\') // skip this character |
aoqi@0 | 362 | gotEscape = true; |
aoqi@0 | 363 | else if (c == '\r') // skip this character |
aoqi@0 | 364 | gotCR = true; |
aoqi@0 | 365 | else // append this character |
aoqi@0 | 366 | sb.append(c); |
aoqi@0 | 367 | } else { |
aoqi@0 | 368 | // Previous character was '\'. So no need to |
aoqi@0 | 369 | // bother with any special processing, just |
aoqi@0 | 370 | // append this character |
aoqi@0 | 371 | sb.append(c); |
aoqi@0 | 372 | gotEscape = false; |
aoqi@0 | 373 | } |
aoqi@0 | 374 | } |
aoqi@0 | 375 | return sb.toString(); |
aoqi@0 | 376 | } |
aoqi@0 | 377 | } |