src/share/jaxws_classes/com/sun/xml/internal/ws/encoding/HeaderTokenizer.java

Fri, 14 Feb 2014 11:13:45 +0100

author
mkos
date
Fri, 14 Feb 2014 11:13:45 +0100
changeset 515
6cd506508147
parent 368
0989ad8c0860
child 637
9c07ef4934dd
permissions
-rw-r--r--

8026188: Enhance envelope factory
Summary: Avoiding caching data initialized via TCCL in static context; fix also reviewed by Alexander Fomin
Reviewed-by: ahgross, mgrebac, skoivu

ohair@286 1 /*
alanb@368 2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
ohair@286 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
ohair@286 4 *
ohair@286 5 * This code is free software; you can redistribute it and/or modify it
ohair@286 6 * under the terms of the GNU General Public License version 2 only, as
ohair@286 7 * published by the Free Software Foundation. Oracle designates this
ohair@286 8 * particular file as subject to the "Classpath" exception as provided
ohair@286 9 * by Oracle in the LICENSE file that accompanied this code.
ohair@286 10 *
ohair@286 11 * This code is distributed in the hope that it will be useful, but WITHOUT
ohair@286 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
ohair@286 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
ohair@286 14 * version 2 for more details (a copy is included in the LICENSE file that
ohair@286 15 * accompanied this code).
ohair@286 16 *
ohair@286 17 * You should have received a copy of the GNU General Public License version
ohair@286 18 * 2 along with this work; if not, write to the Free Software Foundation,
ohair@286 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
ohair@286 20 *
ohair@286 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
ohair@286 22 * or visit www.oracle.com if you need additional information or have any
ohair@286 23 * questions.
ohair@286 24 */
ohair@286 25
ohair@286 26 package com.sun.xml.internal.ws.encoding;
ohair@286 27
ohair@286 28 import javax.xml.ws.WebServiceException;
ohair@286 29
ohair@286 30 /**
ohair@286 31 * This class tokenizes RFC822 and MIME headers into the basic
ohair@286 32 * symbols specified by RFC822 and MIME. <p>
ohair@286 33 *
ohair@286 34 * This class handles folded headers (ie headers with embedded
ohair@286 35 * CRLF SPACE sequences). The folds are removed in the returned
ohair@286 36 * tokens.
ohair@286 37 *
ohair@286 38 * @version 1.9, 02/03/27
ohair@286 39 * @author John Mani
ohair@286 40 */
ohair@286 41
ohair@286 42 class HeaderTokenizer {
ohair@286 43
ohair@286 44 /**
ohair@286 45 * The Token class represents tokens returned by the
ohair@286 46 * HeaderTokenizer.
ohair@286 47 */
ohair@286 48 static class Token {
ohair@286 49
ohair@286 50 private int type;
ohair@286 51 private String value;
ohair@286 52
ohair@286 53 /**
ohair@286 54 * Token type indicating an ATOM.
ohair@286 55 */
ohair@286 56 public static final int ATOM = -1;
ohair@286 57
ohair@286 58 /**
ohair@286 59 * Token type indicating a quoted string. The value
ohair@286 60 * field contains the string without the quotes.
ohair@286 61 */
ohair@286 62 public static final int QUOTEDSTRING = -2;
ohair@286 63
ohair@286 64 /**
ohair@286 65 * Token type indicating a comment. The value field
ohair@286 66 * contains the comment string without the comment
ohair@286 67 * start and end symbols.
ohair@286 68 */
ohair@286 69 public static final int COMMENT = -3;
ohair@286 70
ohair@286 71 /**
ohair@286 72 * Token type indicating end of input.
ohair@286 73 */
ohair@286 74 public static final int EOF = -4;
ohair@286 75
ohair@286 76 /**
ohair@286 77 * Constructor.
ohair@286 78 * @param type Token type
ohair@286 79 * @param value Token value
ohair@286 80 */
ohair@286 81 public Token(int type, String value) {
ohair@286 82 this.type = type;
ohair@286 83 this.value = value;
ohair@286 84 }
ohair@286 85
ohair@286 86 /**
ohair@286 87 * Return the type of the token. If the token represents a
ohair@286 88 * delimiter or a control character, the type is that character
ohair@286 89 * itself, converted to an integer. Otherwise, it's value is
ohair@286 90 * one of the following:
ohair@286 91 * <ul>
ohair@286 92 * <li><code>ATOM</code> A sequence of ASCII characters
ohair@286 93 * delimited by either SPACE, CTL, "(", <"> or the
ohair@286 94 * specified SPECIALS
ohair@286 95 * <li><code>QUOTEDSTRING</code> A sequence of ASCII characters
ohair@286 96 * within quotes
ohair@286 97 * <li><code>COMMENT</code> A sequence of ASCII characters
ohair@286 98 * within "(" and ")".
ohair@286 99 * <li><code>EOF</code> End of header
ohair@286 100 * </ul>
ohair@286 101 */
ohair@286 102 public int getType() {
ohair@286 103 return type;
ohair@286 104 }
ohair@286 105
ohair@286 106 /**
ohair@286 107 * Returns the value of the token just read. When the current
ohair@286 108 * token is a quoted string, this field contains the body of the
ohair@286 109 * string, without the quotes. When the current token is a comment,
ohair@286 110 * this field contains the body of the comment.
ohair@286 111 *
ohair@286 112 * @return token value
ohair@286 113 */
ohair@286 114 public String getValue() {
ohair@286 115 return value;
ohair@286 116 }
ohair@286 117 }
ohair@286 118
ohair@286 119 private String string; // the string to be tokenized
ohair@286 120 private boolean skipComments; // should comments be skipped ?
ohair@286 121 private String delimiters; // delimiter string
ohair@286 122 private int currentPos; // current parse position
ohair@286 123 private int maxPos; // string length
ohair@286 124 private int nextPos; // track start of next Token for next()
ohair@286 125 private int peekPos; // track start of next Token for peek()
ohair@286 126
ohair@286 127 /**
ohair@286 128 * RFC822 specials
ohair@286 129 */
ohair@286 130 private final static String RFC822 = "()<>@,;:\\\"\t .[]";
ohair@286 131
ohair@286 132 /**
ohair@286 133 * MIME specials
ohair@286 134 */
ohair@286 135 final static String MIME = "()<>@,;:\\\"\t []/?=";
ohair@286 136
ohair@286 137 // The EOF Token
ohair@286 138 private final static Token EOFToken = new Token(Token.EOF, null);
ohair@286 139
ohair@286 140 /**
ohair@286 141 * Constructor that takes a rfc822 style header.
ohair@286 142 *
ohair@286 143 * @param header The rfc822 header to be tokenized
ohair@286 144 * @param delimiters Set of delimiter characters
ohair@286 145 * to be used to delimit ATOMS. These
ohair@286 146 * are usually <code>RFC822</code> or
ohair@286 147 * <code>MIME</code>
ohair@286 148 * @param skipComments If true, comments are skipped and
ohair@286 149 * not returned as tokens
ohair@286 150 */
ohair@286 151 HeaderTokenizer(String header, String delimiters,
ohair@286 152 boolean skipComments) {
ohair@286 153 string = (header == null) ? "" : header; // paranoia ?!
ohair@286 154 this.skipComments = skipComments;
ohair@286 155 this.delimiters = delimiters;
ohair@286 156 currentPos = nextPos = peekPos = 0;
ohair@286 157 maxPos = string.length();
ohair@286 158 }
ohair@286 159
ohair@286 160 /**
ohair@286 161 * Constructor. Comments are ignored and not returned as tokens
ohair@286 162 *
ohair@286 163 * @param header The header that is tokenized
ohair@286 164 * @param delimiters The delimiters to be used
ohair@286 165 */
ohair@286 166 HeaderTokenizer(String header, String delimiters) {
ohair@286 167 this(header, delimiters, true);
ohair@286 168 }
ohair@286 169
ohair@286 170 /**
ohair@286 171 * Constructor. The RFC822 defined delimiters - RFC822 - are
ohair@286 172 * used to delimit ATOMS. Also comments are skipped and not
ohair@286 173 * returned as tokens
ohair@286 174 */
ohair@286 175 HeaderTokenizer(String header) {
ohair@286 176 this(header, RFC822);
ohair@286 177 }
ohair@286 178
ohair@286 179 /**
ohair@286 180 * Parses the next token from this String. <p>
ohair@286 181 *
ohair@286 182 * Clients sit in a loop calling next() to parse successive
ohair@286 183 * tokens until an EOF Token is returned.
ohair@286 184 *
ohair@286 185 * @return the next Token
ohair@286 186 * @exception WebServiceException if the parse fails
ohair@286 187 */
ohair@286 188 Token next() throws WebServiceException {
ohair@286 189 Token tk;
ohair@286 190
ohair@286 191 currentPos = nextPos; // setup currentPos
ohair@286 192 tk = getNext();
ohair@286 193 nextPos = peekPos = currentPos; // update currentPos and peekPos
ohair@286 194 return tk;
ohair@286 195 }
ohair@286 196
ohair@286 197 /**
ohair@286 198 * Peek at the next token, without actually removing the token
ohair@286 199 * from the parse stream. Invoking this method multiple times
ohair@286 200 * will return successive tokens, until <code>next()</code> is
ohair@286 201 * called. <p>
ohair@286 202 *
ohair@286 203 * @return the next Token
ohair@286 204 * @exception WebServiceException if the parse fails
ohair@286 205 */
ohair@286 206 Token peek() throws WebServiceException {
ohair@286 207 Token tk;
ohair@286 208
ohair@286 209 currentPos = peekPos; // setup currentPos
ohair@286 210 tk = getNext();
ohair@286 211 peekPos = currentPos; // update peekPos
ohair@286 212 return tk;
ohair@286 213 }
ohair@286 214
ohair@286 215 /**
ohair@286 216 * Return the rest of the Header.
ohair@286 217 *
ohair@286 218 * @return String rest of header. null is returned if we are
ohair@286 219 * already at end of header
ohair@286 220 */
ohair@286 221 String getRemainder() {
ohair@286 222 return string.substring(nextPos);
ohair@286 223 }
ohair@286 224
ohair@286 225 /*
ohair@286 226 * Return the next token starting from 'currentPos'. After the
ohair@286 227 * parse, 'currentPos' is updated to point to the start of the
ohair@286 228 * next token.
ohair@286 229 */
ohair@286 230 private Token getNext() throws WebServiceException {
ohair@286 231 // If we're already at end of string, return EOF
ohair@286 232 if (currentPos >= maxPos)
ohair@286 233 return EOFToken;
ohair@286 234
ohair@286 235 // Skip white-space, position currentPos beyond the space
ohair@286 236 if (skipWhiteSpace() == Token.EOF)
ohair@286 237 return EOFToken;
ohair@286 238
ohair@286 239 char c;
ohair@286 240 int start;
ohair@286 241 boolean filter = false;
ohair@286 242
ohair@286 243 c = string.charAt(currentPos);
ohair@286 244
ohair@286 245 // Check or Skip comments and position currentPos
ohair@286 246 // beyond the comment
ohair@286 247 while (c == '(') {
ohair@286 248 // Parsing comment ..
ohair@286 249 int nesting;
ohair@286 250 for (start = ++currentPos, nesting = 1;
ohair@286 251 nesting > 0 && currentPos < maxPos;
ohair@286 252 currentPos++) {
ohair@286 253 c = string.charAt(currentPos);
ohair@286 254 if (c == '\\') { // Escape sequence
ohair@286 255 currentPos++; // skip the escaped character
ohair@286 256 filter = true;
ohair@286 257 } else if (c == '\r')
ohair@286 258 filter = true;
ohair@286 259 else if (c == '(')
ohair@286 260 nesting++;
ohair@286 261 else if (c == ')')
ohair@286 262 nesting--;
ohair@286 263 }
ohair@286 264 if (nesting != 0)
ohair@286 265 throw new WebServiceException("Unbalanced comments");
ohair@286 266
ohair@286 267 if (!skipComments) {
ohair@286 268 // Return the comment, if we are asked to.
ohair@286 269 // Note that the comment start & end markers are ignored.
ohair@286 270 String s;
ohair@286 271 if (filter) // need to go thru the token again.
ohair@286 272 s = filterToken(string, start, currentPos-1);
ohair@286 273 else
ohair@286 274 s = string.substring(start,currentPos-1);
ohair@286 275
ohair@286 276 return new Token(Token.COMMENT, s);
ohair@286 277 }
ohair@286 278
ohair@286 279 // Skip any whitespace after the comment.
ohair@286 280 if (skipWhiteSpace() == Token.EOF)
ohair@286 281 return EOFToken;
ohair@286 282 c = string.charAt(currentPos);
ohair@286 283 }
ohair@286 284
ohair@286 285 // Check for quoted-string and position currentPos
ohair@286 286 // beyond the terminating quote
ohair@286 287 if (c == '"') {
ohair@286 288 for (start = ++currentPos; currentPos < maxPos; currentPos++) {
ohair@286 289 c = string.charAt(currentPos);
ohair@286 290 if (c == '\\') { // Escape sequence
ohair@286 291 currentPos++;
ohair@286 292 filter = true;
ohair@286 293 } else if (c == '\r')
ohair@286 294 filter = true;
ohair@286 295 else if (c == '"') {
ohair@286 296 currentPos++;
ohair@286 297 String s;
ohair@286 298
ohair@286 299 if (filter)
ohair@286 300 s = filterToken(string, start, currentPos-1);
ohair@286 301 else
ohair@286 302 s = string.substring(start,currentPos-1);
ohair@286 303
ohair@286 304 return new Token(Token.QUOTEDSTRING, s);
ohair@286 305 }
ohair@286 306 }
ohair@286 307 throw new WebServiceException("Unbalanced quoted string");
ohair@286 308 }
ohair@286 309
ohair@286 310 // Check for SPECIAL or CTL
ohair@286 311 if (c < 040 || c >= 0177 || delimiters.indexOf(c) >= 0) {
ohair@286 312 currentPos++; // re-position currentPos
ohair@286 313 char ch[] = new char[1];
ohair@286 314 ch[0] = c;
ohair@286 315 return new Token((int)c, new String(ch));
ohair@286 316 }
ohair@286 317
ohair@286 318 // Check for ATOM
ohair@286 319 for (start = currentPos; currentPos < maxPos; currentPos++) {
ohair@286 320 c = string.charAt(currentPos);
ohair@286 321 // ATOM is delimited by either SPACE, CTL, "(", <">
ohair@286 322 // or the specified SPECIALS
ohair@286 323 if (c < 040 || c >= 0177 || c == '(' || c == ' ' ||
ohair@286 324 c == '"' || delimiters.indexOf(c) >= 0)
ohair@286 325 break;
ohair@286 326 }
ohair@286 327 return new Token(Token.ATOM, string.substring(start, currentPos));
ohair@286 328 }
ohair@286 329
ohair@286 330 // Skip SPACE, HT, CR and NL
ohair@286 331 private int skipWhiteSpace() {
ohair@286 332 char c;
ohair@286 333 for (; currentPos < maxPos; currentPos++)
ohair@286 334 if (((c = string.charAt(currentPos)) != ' ') &&
ohair@286 335 (c != '\t') && (c != '\r') && (c != '\n'))
ohair@286 336 return currentPos;
ohair@286 337 return Token.EOF;
ohair@286 338 }
ohair@286 339
ohair@286 340 /* Process escape sequences and embedded LWSPs from a comment or
ohair@286 341 * quoted string.
ohair@286 342 */
ohair@286 343 private static String filterToken(String s, int start, int end) {
ohair@286 344 StringBuffer sb = new StringBuffer();
ohair@286 345 char c;
ohair@286 346 boolean gotEscape = false;
ohair@286 347 boolean gotCR = false;
ohair@286 348
ohair@286 349 for (int i = start; i < end; i++) {
ohair@286 350 c = s.charAt(i);
ohair@286 351 if (c == '\n' && gotCR) {
ohair@286 352 // This LF is part of an unescaped
ohair@286 353 // CRLF sequence (i.e, LWSP). Skip it.
ohair@286 354 gotCR = false;
ohair@286 355 continue;
ohair@286 356 }
ohair@286 357
ohair@286 358 gotCR = false;
ohair@286 359 if (!gotEscape) {
ohair@286 360 // Previous character was NOT '\'
ohair@286 361 if (c == '\\') // skip this character
ohair@286 362 gotEscape = true;
ohair@286 363 else if (c == '\r') // skip this character
ohair@286 364 gotCR = true;
ohair@286 365 else // append this character
ohair@286 366 sb.append(c);
ohair@286 367 } else {
ohair@286 368 // Previous character was '\'. So no need to
ohair@286 369 // bother with any special processing, just
ohair@286 370 // append this character
ohair@286 371 sb.append(c);
ohair@286 372 gotEscape = false;
ohair@286 373 }
ohair@286 374 }
ohair@286 375 return sb.toString();
ohair@286 376 }
ohair@286 377 }

mercurial