src/share/jaxws_classes/com/sun/xml/internal/ws/encoding/HeaderTokenizer.java

Thu, 31 Aug 2017 15:18:52 +0800

author
aoqi
date
Thu, 31 Aug 2017 15:18:52 +0800
changeset 637
9c07ef4934dd
parent 368
0989ad8c0860
parent 0
373ffda63c9a
permissions
-rw-r--r--

merge

aoqi@0 1 /*
aoqi@0 2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
aoqi@0 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
aoqi@0 4 *
aoqi@0 5 * This code is free software; you can redistribute it and/or modify it
aoqi@0 6 * under the terms of the GNU General Public License version 2 only, as
aoqi@0 7 * published by the Free Software Foundation. Oracle designates this
aoqi@0 8 * particular file as subject to the "Classpath" exception as provided
aoqi@0 9 * by Oracle in the LICENSE file that accompanied this code.
aoqi@0 10 *
aoqi@0 11 * This code is distributed in the hope that it will be useful, but WITHOUT
aoqi@0 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
aoqi@0 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
aoqi@0 14 * version 2 for more details (a copy is included in the LICENSE file that
aoqi@0 15 * accompanied this code).
aoqi@0 16 *
aoqi@0 17 * You should have received a copy of the GNU General Public License version
aoqi@0 18 * 2 along with this work; if not, write to the Free Software Foundation,
aoqi@0 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
aoqi@0 20 *
aoqi@0 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
aoqi@0 22 * or visit www.oracle.com if you need additional information or have any
aoqi@0 23 * questions.
aoqi@0 24 */
aoqi@0 25
aoqi@0 26 package com.sun.xml.internal.ws.encoding;
aoqi@0 27
aoqi@0 28 import javax.xml.ws.WebServiceException;
aoqi@0 29
aoqi@0 30 /**
aoqi@0 31 * This class tokenizes RFC822 and MIME headers into the basic
aoqi@0 32 * symbols specified by RFC822 and MIME. <p>
aoqi@0 33 *
aoqi@0 34 * This class handles folded headers (ie headers with embedded
aoqi@0 35 * CRLF SPACE sequences). The folds are removed in the returned
aoqi@0 36 * tokens.
aoqi@0 37 *
aoqi@0 38 * @version 1.9, 02/03/27
aoqi@0 39 * @author John Mani
aoqi@0 40 */
aoqi@0 41
aoqi@0 42 class HeaderTokenizer {
aoqi@0 43
aoqi@0 44 /**
aoqi@0 45 * The Token class represents tokens returned by the
aoqi@0 46 * HeaderTokenizer.
aoqi@0 47 */
aoqi@0 48 static class Token {
aoqi@0 49
aoqi@0 50 private int type;
aoqi@0 51 private String value;
aoqi@0 52
aoqi@0 53 /**
aoqi@0 54 * Token type indicating an ATOM.
aoqi@0 55 */
aoqi@0 56 public static final int ATOM = -1;
aoqi@0 57
aoqi@0 58 /**
aoqi@0 59 * Token type indicating a quoted string. The value
aoqi@0 60 * field contains the string without the quotes.
aoqi@0 61 */
aoqi@0 62 public static final int QUOTEDSTRING = -2;
aoqi@0 63
aoqi@0 64 /**
aoqi@0 65 * Token type indicating a comment. The value field
aoqi@0 66 * contains the comment string without the comment
aoqi@0 67 * start and end symbols.
aoqi@0 68 */
aoqi@0 69 public static final int COMMENT = -3;
aoqi@0 70
aoqi@0 71 /**
aoqi@0 72 * Token type indicating end of input.
aoqi@0 73 */
aoqi@0 74 public static final int EOF = -4;
aoqi@0 75
aoqi@0 76 /**
aoqi@0 77 * Constructor.
aoqi@0 78 * @param type Token type
aoqi@0 79 * @param value Token value
aoqi@0 80 */
aoqi@0 81 public Token(int type, String value) {
aoqi@0 82 this.type = type;
aoqi@0 83 this.value = value;
aoqi@0 84 }
aoqi@0 85
aoqi@0 86 /**
aoqi@0 87 * Return the type of the token. If the token represents a
aoqi@0 88 * delimiter or a control character, the type is that character
aoqi@0 89 * itself, converted to an integer. Otherwise, it's value is
aoqi@0 90 * one of the following:
aoqi@0 91 * <ul>
aoqi@0 92 * <li><code>ATOM</code> A sequence of ASCII characters
aoqi@0 93 * delimited by either SPACE, CTL, "(", <"> or the
aoqi@0 94 * specified SPECIALS
aoqi@0 95 * <li><code>QUOTEDSTRING</code> A sequence of ASCII characters
aoqi@0 96 * within quotes
aoqi@0 97 * <li><code>COMMENT</code> A sequence of ASCII characters
aoqi@0 98 * within "(" and ")".
aoqi@0 99 * <li><code>EOF</code> End of header
aoqi@0 100 * </ul>
aoqi@0 101 */
aoqi@0 102 public int getType() {
aoqi@0 103 return type;
aoqi@0 104 }
aoqi@0 105
aoqi@0 106 /**
aoqi@0 107 * Returns the value of the token just read. When the current
aoqi@0 108 * token is a quoted string, this field contains the body of the
aoqi@0 109 * string, without the quotes. When the current token is a comment,
aoqi@0 110 * this field contains the body of the comment.
aoqi@0 111 *
aoqi@0 112 * @return token value
aoqi@0 113 */
aoqi@0 114 public String getValue() {
aoqi@0 115 return value;
aoqi@0 116 }
aoqi@0 117 }
aoqi@0 118
aoqi@0 119 private String string; // the string to be tokenized
aoqi@0 120 private boolean skipComments; // should comments be skipped ?
aoqi@0 121 private String delimiters; // delimiter string
aoqi@0 122 private int currentPos; // current parse position
aoqi@0 123 private int maxPos; // string length
aoqi@0 124 private int nextPos; // track start of next Token for next()
aoqi@0 125 private int peekPos; // track start of next Token for peek()
aoqi@0 126
aoqi@0 127 /**
aoqi@0 128 * RFC822 specials
aoqi@0 129 */
aoqi@0 130 private final static String RFC822 = "()<>@,;:\\\"\t .[]";
aoqi@0 131
aoqi@0 132 /**
aoqi@0 133 * MIME specials
aoqi@0 134 */
aoqi@0 135 final static String MIME = "()<>@,;:\\\"\t []/?=";
aoqi@0 136
aoqi@0 137 // The EOF Token
aoqi@0 138 private final static Token EOFToken = new Token(Token.EOF, null);
aoqi@0 139
aoqi@0 140 /**
aoqi@0 141 * Constructor that takes a rfc822 style header.
aoqi@0 142 *
aoqi@0 143 * @param header The rfc822 header to be tokenized
aoqi@0 144 * @param delimiters Set of delimiter characters
aoqi@0 145 * to be used to delimit ATOMS. These
aoqi@0 146 * are usually <code>RFC822</code> or
aoqi@0 147 * <code>MIME</code>
aoqi@0 148 * @param skipComments If true, comments are skipped and
aoqi@0 149 * not returned as tokens
aoqi@0 150 */
aoqi@0 151 HeaderTokenizer(String header, String delimiters,
aoqi@0 152 boolean skipComments) {
aoqi@0 153 string = (header == null) ? "" : header; // paranoia ?!
aoqi@0 154 this.skipComments = skipComments;
aoqi@0 155 this.delimiters = delimiters;
aoqi@0 156 currentPos = nextPos = peekPos = 0;
aoqi@0 157 maxPos = string.length();
aoqi@0 158 }
aoqi@0 159
aoqi@0 160 /**
aoqi@0 161 * Constructor. Comments are ignored and not returned as tokens
aoqi@0 162 *
aoqi@0 163 * @param header The header that is tokenized
aoqi@0 164 * @param delimiters The delimiters to be used
aoqi@0 165 */
aoqi@0 166 HeaderTokenizer(String header, String delimiters) {
aoqi@0 167 this(header, delimiters, true);
aoqi@0 168 }
aoqi@0 169
aoqi@0 170 /**
aoqi@0 171 * Constructor. The RFC822 defined delimiters - RFC822 - are
aoqi@0 172 * used to delimit ATOMS. Also comments are skipped and not
aoqi@0 173 * returned as tokens
aoqi@0 174 */
aoqi@0 175 HeaderTokenizer(String header) {
aoqi@0 176 this(header, RFC822);
aoqi@0 177 }
aoqi@0 178
aoqi@0 179 /**
aoqi@0 180 * Parses the next token from this String. <p>
aoqi@0 181 *
aoqi@0 182 * Clients sit in a loop calling next() to parse successive
aoqi@0 183 * tokens until an EOF Token is returned.
aoqi@0 184 *
aoqi@0 185 * @return the next Token
aoqi@0 186 * @exception WebServiceException if the parse fails
aoqi@0 187 */
aoqi@0 188 Token next() throws WebServiceException {
aoqi@0 189 Token tk;
aoqi@0 190
aoqi@0 191 currentPos = nextPos; // setup currentPos
aoqi@0 192 tk = getNext();
aoqi@0 193 nextPos = peekPos = currentPos; // update currentPos and peekPos
aoqi@0 194 return tk;
aoqi@0 195 }
aoqi@0 196
aoqi@0 197 /**
aoqi@0 198 * Peek at the next token, without actually removing the token
aoqi@0 199 * from the parse stream. Invoking this method multiple times
aoqi@0 200 * will return successive tokens, until <code>next()</code> is
aoqi@0 201 * called. <p>
aoqi@0 202 *
aoqi@0 203 * @return the next Token
aoqi@0 204 * @exception WebServiceException if the parse fails
aoqi@0 205 */
aoqi@0 206 Token peek() throws WebServiceException {
aoqi@0 207 Token tk;
aoqi@0 208
aoqi@0 209 currentPos = peekPos; // setup currentPos
aoqi@0 210 tk = getNext();
aoqi@0 211 peekPos = currentPos; // update peekPos
aoqi@0 212 return tk;
aoqi@0 213 }
aoqi@0 214
aoqi@0 215 /**
aoqi@0 216 * Return the rest of the Header.
aoqi@0 217 *
aoqi@0 218 * @return String rest of header. null is returned if we are
aoqi@0 219 * already at end of header
aoqi@0 220 */
aoqi@0 221 String getRemainder() {
aoqi@0 222 return string.substring(nextPos);
aoqi@0 223 }
aoqi@0 224
aoqi@0 225 /*
aoqi@0 226 * Return the next token starting from 'currentPos'. After the
aoqi@0 227 * parse, 'currentPos' is updated to point to the start of the
aoqi@0 228 * next token.
aoqi@0 229 */
aoqi@0 230 private Token getNext() throws WebServiceException {
aoqi@0 231 // If we're already at end of string, return EOF
aoqi@0 232 if (currentPos >= maxPos)
aoqi@0 233 return EOFToken;
aoqi@0 234
aoqi@0 235 // Skip white-space, position currentPos beyond the space
aoqi@0 236 if (skipWhiteSpace() == Token.EOF)
aoqi@0 237 return EOFToken;
aoqi@0 238
aoqi@0 239 char c;
aoqi@0 240 int start;
aoqi@0 241 boolean filter = false;
aoqi@0 242
aoqi@0 243 c = string.charAt(currentPos);
aoqi@0 244
aoqi@0 245 // Check or Skip comments and position currentPos
aoqi@0 246 // beyond the comment
aoqi@0 247 while (c == '(') {
aoqi@0 248 // Parsing comment ..
aoqi@0 249 int nesting;
aoqi@0 250 for (start = ++currentPos, nesting = 1;
aoqi@0 251 nesting > 0 && currentPos < maxPos;
aoqi@0 252 currentPos++) {
aoqi@0 253 c = string.charAt(currentPos);
aoqi@0 254 if (c == '\\') { // Escape sequence
aoqi@0 255 currentPos++; // skip the escaped character
aoqi@0 256 filter = true;
aoqi@0 257 } else if (c == '\r')
aoqi@0 258 filter = true;
aoqi@0 259 else if (c == '(')
aoqi@0 260 nesting++;
aoqi@0 261 else if (c == ')')
aoqi@0 262 nesting--;
aoqi@0 263 }
aoqi@0 264 if (nesting != 0)
aoqi@0 265 throw new WebServiceException("Unbalanced comments");
aoqi@0 266
aoqi@0 267 if (!skipComments) {
aoqi@0 268 // Return the comment, if we are asked to.
aoqi@0 269 // Note that the comment start & end markers are ignored.
aoqi@0 270 String s;
aoqi@0 271 if (filter) // need to go thru the token again.
aoqi@0 272 s = filterToken(string, start, currentPos-1);
aoqi@0 273 else
aoqi@0 274 s = string.substring(start,currentPos-1);
aoqi@0 275
aoqi@0 276 return new Token(Token.COMMENT, s);
aoqi@0 277 }
aoqi@0 278
aoqi@0 279 // Skip any whitespace after the comment.
aoqi@0 280 if (skipWhiteSpace() == Token.EOF)
aoqi@0 281 return EOFToken;
aoqi@0 282 c = string.charAt(currentPos);
aoqi@0 283 }
aoqi@0 284
aoqi@0 285 // Check for quoted-string and position currentPos
aoqi@0 286 // beyond the terminating quote
aoqi@0 287 if (c == '"') {
aoqi@0 288 for (start = ++currentPos; currentPos < maxPos; currentPos++) {
aoqi@0 289 c = string.charAt(currentPos);
aoqi@0 290 if (c == '\\') { // Escape sequence
aoqi@0 291 currentPos++;
aoqi@0 292 filter = true;
aoqi@0 293 } else if (c == '\r')
aoqi@0 294 filter = true;
aoqi@0 295 else if (c == '"') {
aoqi@0 296 currentPos++;
aoqi@0 297 String s;
aoqi@0 298
aoqi@0 299 if (filter)
aoqi@0 300 s = filterToken(string, start, currentPos-1);
aoqi@0 301 else
aoqi@0 302 s = string.substring(start,currentPos-1);
aoqi@0 303
aoqi@0 304 return new Token(Token.QUOTEDSTRING, s);
aoqi@0 305 }
aoqi@0 306 }
aoqi@0 307 throw new WebServiceException("Unbalanced quoted string");
aoqi@0 308 }
aoqi@0 309
aoqi@0 310 // Check for SPECIAL or CTL
aoqi@0 311 if (c < 040 || c >= 0177 || delimiters.indexOf(c) >= 0) {
aoqi@0 312 currentPos++; // re-position currentPos
aoqi@0 313 char ch[] = new char[1];
aoqi@0 314 ch[0] = c;
aoqi@0 315 return new Token((int)c, new String(ch));
aoqi@0 316 }
aoqi@0 317
aoqi@0 318 // Check for ATOM
aoqi@0 319 for (start = currentPos; currentPos < maxPos; currentPos++) {
aoqi@0 320 c = string.charAt(currentPos);
aoqi@0 321 // ATOM is delimited by either SPACE, CTL, "(", <">
aoqi@0 322 // or the specified SPECIALS
aoqi@0 323 if (c < 040 || c >= 0177 || c == '(' || c == ' ' ||
aoqi@0 324 c == '"' || delimiters.indexOf(c) >= 0)
aoqi@0 325 break;
aoqi@0 326 }
aoqi@0 327 return new Token(Token.ATOM, string.substring(start, currentPos));
aoqi@0 328 }
aoqi@0 329
aoqi@0 330 // Skip SPACE, HT, CR and NL
aoqi@0 331 private int skipWhiteSpace() {
aoqi@0 332 char c;
aoqi@0 333 for (; currentPos < maxPos; currentPos++)
aoqi@0 334 if (((c = string.charAt(currentPos)) != ' ') &&
aoqi@0 335 (c != '\t') && (c != '\r') && (c != '\n'))
aoqi@0 336 return currentPos;
aoqi@0 337 return Token.EOF;
aoqi@0 338 }
aoqi@0 339
aoqi@0 340 /* Process escape sequences and embedded LWSPs from a comment or
aoqi@0 341 * quoted string.
aoqi@0 342 */
aoqi@0 343 private static String filterToken(String s, int start, int end) {
aoqi@0 344 StringBuffer sb = new StringBuffer();
aoqi@0 345 char c;
aoqi@0 346 boolean gotEscape = false;
aoqi@0 347 boolean gotCR = false;
aoqi@0 348
aoqi@0 349 for (int i = start; i < end; i++) {
aoqi@0 350 c = s.charAt(i);
aoqi@0 351 if (c == '\n' && gotCR) {
aoqi@0 352 // This LF is part of an unescaped
aoqi@0 353 // CRLF sequence (i.e, LWSP). Skip it.
aoqi@0 354 gotCR = false;
aoqi@0 355 continue;
aoqi@0 356 }
aoqi@0 357
aoqi@0 358 gotCR = false;
aoqi@0 359 if (!gotEscape) {
aoqi@0 360 // Previous character was NOT '\'
aoqi@0 361 if (c == '\\') // skip this character
aoqi@0 362 gotEscape = true;
aoqi@0 363 else if (c == '\r') // skip this character
aoqi@0 364 gotCR = true;
aoqi@0 365 else // append this character
aoqi@0 366 sb.append(c);
aoqi@0 367 } else {
aoqi@0 368 // Previous character was '\'. So no need to
aoqi@0 369 // bother with any special processing, just
aoqi@0 370 // append this character
aoqi@0 371 sb.append(c);
aoqi@0 372 gotEscape = false;
aoqi@0 373 }
aoqi@0 374 }
aoqi@0 375 return sb.toString();
aoqi@0 376 }
aoqi@0 377 }

mercurial