Wed, 27 Apr 2016 01:27:09 +0800
Initial load
http://hg.openjdk.java.net/jdk8u/jdk8u/jaxws/
changeset: 657:d47a47f961ee
tag: jdk8u25-b17
1 /*
2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
26 package com.sun.xml.internal.ws.encoding;
28 import javax.xml.ws.WebServiceException;
30 /**
31 * This class tokenizes RFC822 and MIME headers into the basic
32 * symbols specified by RFC822 and MIME. <p>
33 *
34 * This class handles folded headers (ie headers with embedded
35 * CRLF SPACE sequences). The folds are removed in the returned
36 * tokens.
37 *
38 * @version 1.9, 02/03/27
39 * @author John Mani
40 */
42 class HeaderTokenizer {
44 /**
45 * The Token class represents tokens returned by the
46 * HeaderTokenizer.
47 */
48 static class Token {
50 private int type;
51 private String value;
53 /**
54 * Token type indicating an ATOM.
55 */
56 public static final int ATOM = -1;
58 /**
59 * Token type indicating a quoted string. The value
60 * field contains the string without the quotes.
61 */
62 public static final int QUOTEDSTRING = -2;
64 /**
65 * Token type indicating a comment. The value field
66 * contains the comment string without the comment
67 * start and end symbols.
68 */
69 public static final int COMMENT = -3;
71 /**
72 * Token type indicating end of input.
73 */
74 public static final int EOF = -4;
76 /**
77 * Constructor.
78 * @param type Token type
79 * @param value Token value
80 */
81 public Token(int type, String value) {
82 this.type = type;
83 this.value = value;
84 }
86 /**
87 * Return the type of the token. If the token represents a
88 * delimiter or a control character, the type is that character
89 * itself, converted to an integer. Otherwise, it's value is
90 * one of the following:
91 * <ul>
92 * <li><code>ATOM</code> A sequence of ASCII characters
93 * delimited by either SPACE, CTL, "(", <"> or the
94 * specified SPECIALS
95 * <li><code>QUOTEDSTRING</code> A sequence of ASCII characters
96 * within quotes
97 * <li><code>COMMENT</code> A sequence of ASCII characters
98 * within "(" and ")".
99 * <li><code>EOF</code> End of header
100 * </ul>
101 */
102 public int getType() {
103 return type;
104 }
106 /**
107 * Returns the value of the token just read. When the current
108 * token is a quoted string, this field contains the body of the
109 * string, without the quotes. When the current token is a comment,
110 * this field contains the body of the comment.
111 *
112 * @return token value
113 */
114 public String getValue() {
115 return value;
116 }
117 }
119 private String string; // the string to be tokenized
120 private boolean skipComments; // should comments be skipped ?
121 private String delimiters; // delimiter string
122 private int currentPos; // current parse position
123 private int maxPos; // string length
124 private int nextPos; // track start of next Token for next()
125 private int peekPos; // track start of next Token for peek()
127 /**
128 * RFC822 specials
129 */
130 private final static String RFC822 = "()<>@,;:\\\"\t .[]";
132 /**
133 * MIME specials
134 */
135 final static String MIME = "()<>@,;:\\\"\t []/?=";
137 // The EOF Token
138 private final static Token EOFToken = new Token(Token.EOF, null);
140 /**
141 * Constructor that takes a rfc822 style header.
142 *
143 * @param header The rfc822 header to be tokenized
144 * @param delimiters Set of delimiter characters
145 * to be used to delimit ATOMS. These
146 * are usually <code>RFC822</code> or
147 * <code>MIME</code>
148 * @param skipComments If true, comments are skipped and
149 * not returned as tokens
150 */
151 HeaderTokenizer(String header, String delimiters,
152 boolean skipComments) {
153 string = (header == null) ? "" : header; // paranoia ?!
154 this.skipComments = skipComments;
155 this.delimiters = delimiters;
156 currentPos = nextPos = peekPos = 0;
157 maxPos = string.length();
158 }
160 /**
161 * Constructor. Comments are ignored and not returned as tokens
162 *
163 * @param header The header that is tokenized
164 * @param delimiters The delimiters to be used
165 */
166 HeaderTokenizer(String header, String delimiters) {
167 this(header, delimiters, true);
168 }
170 /**
171 * Constructor. The RFC822 defined delimiters - RFC822 - are
172 * used to delimit ATOMS. Also comments are skipped and not
173 * returned as tokens
174 */
175 HeaderTokenizer(String header) {
176 this(header, RFC822);
177 }
179 /**
180 * Parses the next token from this String. <p>
181 *
182 * Clients sit in a loop calling next() to parse successive
183 * tokens until an EOF Token is returned.
184 *
185 * @return the next Token
186 * @exception WebServiceException if the parse fails
187 */
188 Token next() throws WebServiceException {
189 Token tk;
191 currentPos = nextPos; // setup currentPos
192 tk = getNext();
193 nextPos = peekPos = currentPos; // update currentPos and peekPos
194 return tk;
195 }
197 /**
198 * Peek at the next token, without actually removing the token
199 * from the parse stream. Invoking this method multiple times
200 * will return successive tokens, until <code>next()</code> is
201 * called. <p>
202 *
203 * @return the next Token
204 * @exception WebServiceException if the parse fails
205 */
206 Token peek() throws WebServiceException {
207 Token tk;
209 currentPos = peekPos; // setup currentPos
210 tk = getNext();
211 peekPos = currentPos; // update peekPos
212 return tk;
213 }
215 /**
216 * Return the rest of the Header.
217 *
218 * @return String rest of header. null is returned if we are
219 * already at end of header
220 */
221 String getRemainder() {
222 return string.substring(nextPos);
223 }
225 /*
226 * Return the next token starting from 'currentPos'. After the
227 * parse, 'currentPos' is updated to point to the start of the
228 * next token.
229 */
230 private Token getNext() throws WebServiceException {
231 // If we're already at end of string, return EOF
232 if (currentPos >= maxPos)
233 return EOFToken;
235 // Skip white-space, position currentPos beyond the space
236 if (skipWhiteSpace() == Token.EOF)
237 return EOFToken;
239 char c;
240 int start;
241 boolean filter = false;
243 c = string.charAt(currentPos);
245 // Check or Skip comments and position currentPos
246 // beyond the comment
247 while (c == '(') {
248 // Parsing comment ..
249 int nesting;
250 for (start = ++currentPos, nesting = 1;
251 nesting > 0 && currentPos < maxPos;
252 currentPos++) {
253 c = string.charAt(currentPos);
254 if (c == '\\') { // Escape sequence
255 currentPos++; // skip the escaped character
256 filter = true;
257 } else if (c == '\r')
258 filter = true;
259 else if (c == '(')
260 nesting++;
261 else if (c == ')')
262 nesting--;
263 }
264 if (nesting != 0)
265 throw new WebServiceException("Unbalanced comments");
267 if (!skipComments) {
268 // Return the comment, if we are asked to.
269 // Note that the comment start & end markers are ignored.
270 String s;
271 if (filter) // need to go thru the token again.
272 s = filterToken(string, start, currentPos-1);
273 else
274 s = string.substring(start,currentPos-1);
276 return new Token(Token.COMMENT, s);
277 }
279 // Skip any whitespace after the comment.
280 if (skipWhiteSpace() == Token.EOF)
281 return EOFToken;
282 c = string.charAt(currentPos);
283 }
285 // Check for quoted-string and position currentPos
286 // beyond the terminating quote
287 if (c == '"') {
288 for (start = ++currentPos; currentPos < maxPos; currentPos++) {
289 c = string.charAt(currentPos);
290 if (c == '\\') { // Escape sequence
291 currentPos++;
292 filter = true;
293 } else if (c == '\r')
294 filter = true;
295 else if (c == '"') {
296 currentPos++;
297 String s;
299 if (filter)
300 s = filterToken(string, start, currentPos-1);
301 else
302 s = string.substring(start,currentPos-1);
304 return new Token(Token.QUOTEDSTRING, s);
305 }
306 }
307 throw new WebServiceException("Unbalanced quoted string");
308 }
310 // Check for SPECIAL or CTL
311 if (c < 040 || c >= 0177 || delimiters.indexOf(c) >= 0) {
312 currentPos++; // re-position currentPos
313 char ch[] = new char[1];
314 ch[0] = c;
315 return new Token((int)c, new String(ch));
316 }
318 // Check for ATOM
319 for (start = currentPos; currentPos < maxPos; currentPos++) {
320 c = string.charAt(currentPos);
321 // ATOM is delimited by either SPACE, CTL, "(", <">
322 // or the specified SPECIALS
323 if (c < 040 || c >= 0177 || c == '(' || c == ' ' ||
324 c == '"' || delimiters.indexOf(c) >= 0)
325 break;
326 }
327 return new Token(Token.ATOM, string.substring(start, currentPos));
328 }
330 // Skip SPACE, HT, CR and NL
331 private int skipWhiteSpace() {
332 char c;
333 for (; currentPos < maxPos; currentPos++)
334 if (((c = string.charAt(currentPos)) != ' ') &&
335 (c != '\t') && (c != '\r') && (c != '\n'))
336 return currentPos;
337 return Token.EOF;
338 }
340 /* Process escape sequences and embedded LWSPs from a comment or
341 * quoted string.
342 */
343 private static String filterToken(String s, int start, int end) {
344 StringBuffer sb = new StringBuffer();
345 char c;
346 boolean gotEscape = false;
347 boolean gotCR = false;
349 for (int i = start; i < end; i++) {
350 c = s.charAt(i);
351 if (c == '\n' && gotCR) {
352 // This LF is part of an unescaped
353 // CRLF sequence (i.e, LWSP). Skip it.
354 gotCR = false;
355 continue;
356 }
358 gotCR = false;
359 if (!gotEscape) {
360 // Previous character was NOT '\'
361 if (c == '\\') // skip this character
362 gotEscape = true;
363 else if (c == '\r') // skip this character
364 gotCR = true;
365 else // append this character
366 sb.append(c);
367 } else {
368 // Previous character was '\'. So no need to
369 // bother with any special processing, just
370 // append this character
371 sb.append(c);
372 gotEscape = false;
373 }
374 }
375 return sb.toString();
376 }
377 }