Mon, 04 May 2009 21:10:41 -0700
6658158: Mutable statics in SAAJ (findbugs)
6658163: txw2.DatatypeWriter.BUILDIN is a mutable static (findbugs)
Reviewed-by: darcy
1 /*
2 * Copyright 2005-2006 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
26 package com.sun.codemodel.internal.util;
28 import java.nio.CharBuffer;
29 import java.nio.charset.CoderResult;
32 /**
33 * Utility class for dealing with surrogates.
34 *
35 * @author Mark Reinhold
36 * @version 1.11, 03/01/23
37 */
39 class Surrogate {
41 private Surrogate() { }
43 // UTF-16 surrogate-character ranges
44 //
45 public static final char MIN_HIGH = '\uD800';
46 public static final char MAX_HIGH = '\uDBFF';
47 public static final char MIN_LOW = '\uDC00';
48 public static final char MAX_LOW = '\uDFFF';
49 public static final char MIN = MIN_HIGH;
50 public static final char MAX = MAX_LOW;
52 // Range of UCS-4 values that need surrogates in UTF-16
53 //
54 public static final int UCS4_MIN = 0x10000;
55 public static final int UCS4_MAX = (1 << 20) + UCS4_MIN - 1;
57 /**
58 * Tells whether or not the given UTF-16 value is a high surrogate.
59 */
60 public static boolean isHigh(int c) {
61 return (MIN_HIGH <= c) && (c <= MAX_HIGH);
62 }
64 /**
65 * Tells whether or not the given UTF-16 value is a low surrogate.
66 */
67 public static boolean isLow(int c) {
68 return (MIN_LOW <= c) && (c <= MAX_LOW);
69 }
71 /**
72 * Tells whether or not the given UTF-16 value is a surrogate character,
73 */
74 public static boolean is(int c) {
75 return (MIN <= c) && (c <= MAX);
76 }
78 /**
79 * Tells whether or not the given UCS-4 character must be represented as a
80 * surrogate pair in UTF-16.
81 */
82 public static boolean neededFor(int uc) {
83 return (uc >= UCS4_MIN) && (uc <= UCS4_MAX);
84 }
86 /**
87 * Returns the high UTF-16 surrogate for the given UCS-4 character.
88 */
89 public static char high(int uc) {
90 return (char)(0xd800 | (((uc - UCS4_MIN) >> 10) & 0x3ff));
91 }
93 /**
94 * Returns the low UTF-16 surrogate for the given UCS-4 character.
95 */
96 public static char low(int uc) {
97 return (char)(0xdc00 | ((uc - UCS4_MIN) & 0x3ff));
98 }
100 /**
101 * Converts the given surrogate pair into a 32-bit UCS-4 character.
102 */
103 public static int toUCS4(char c, char d) {
104 return (((c & 0x3ff) << 10) | (d & 0x3ff)) + 0x10000;
105 }
107 /**
108 * Surrogate parsing support. Charset implementations may use instances of
109 * this class to handle the details of parsing UTF-16 surrogate pairs.
110 */
111 public static class Parser {
113 public Parser() { }
115 private int character; // UCS-4
116 private CoderResult error = CoderResult.UNDERFLOW;
117 private boolean isPair;
119 /**
120 * Returns the UCS-4 character previously parsed.
121 */
122 public int character() {
123 return character;
124 }
126 /**
127 * Tells whether or not the previously-parsed UCS-4 character was
128 * originally represented by a surrogate pair.
129 */
130 public boolean isPair() {
131 return isPair;
132 }
134 /**
135 * Returns the number of UTF-16 characters consumed by the previous
136 * parse.
137 */
138 public int increment() {
139 return isPair ? 2 : 1;
140 }
142 /**
143 * If the previous parse operation detected an error, return the object
144 * describing that error.
145 */
146 public CoderResult error() {
147 return error;
148 }
150 /**
151 * Returns an unmappable-input result object, with the appropriate
152 * input length, for the previously-parsed character.
153 */
154 public CoderResult unmappableResult() {
155 return CoderResult.unmappableForLength(isPair ? 2 : 1);
156 }
158 /**
159 * Parses a UCS-4 character from the given source buffer, handling
160 * surrogates.
161 *
162 * @param c The first character
163 * @param in The source buffer, from which one more character
164 * will be consumed if c is a high surrogate
165 *
166 * @return Either a parsed UCS-4 character, in which case the isPair()
167 * and increment() methods will return meaningful values, or
168 * -1, in which case error() will return a descriptive result
169 * object
170 */
171 public int parse(char c, CharBuffer in) {
172 if (isHigh(c)) {
173 if (!in.hasRemaining()) {
174 error = CoderResult.UNDERFLOW;
175 return -1;
176 }
177 char d = in.get();
178 if (isLow(d)) {
179 character = toUCS4(c, d);
180 isPair = true;
181 error = null;
182 return character;
183 }
184 error = CoderResult.malformedForLength(1);
185 return -1;
186 }
187 if (isLow(c)) {
188 error = CoderResult.malformedForLength(1);
189 return -1;
190 }
191 character = c;
192 isPair = false;
193 error = null;
194 return character;
195 }
197 /**
198 * Parses a UCS-4 character from the given source buffer, handling
199 * surrogates.
200 *
201 * @param c The first character
202 * @param ia The input array, from which one more character
203 * will be consumed if c is a high surrogate
204 * @param ip The input index
205 * @param il The input limit
206 *
207 * @return Either a parsed UCS-4 character, in which case the isPair()
208 * and increment() methods will return meaningful values, or
209 * -1, in which case error() will return a descriptive result
210 * object
211 */
212 public int parse(char c, char[] ia, int ip, int il) {
213 if (isHigh(c)) {
214 if (il - ip < 2) {
215 error = CoderResult.UNDERFLOW;
216 return -1;
217 }
218 char d = ia[ip + 1];
219 if (isLow(d)) {
220 character = toUCS4(c, d);
221 isPair = true;
222 error = null;
223 return character;
224 }
225 error = CoderResult.malformedForLength(1);
226 return -1;
227 }
228 if (isLow(c)) {
229 error = CoderResult.malformedForLength(1);
230 return -1;
231 }
232 character = c;
233 isPair = false;
234 error = null;
235 return character;
236 }
238 }
240 /**
241 * Surrogate generation support. Charset implementations may use instances
242 * of this class to handle the details of generating UTF-16 surrogate
243 * pairs.
244 */
245 public static class Generator {
247 public Generator() { }
249 private CoderResult error = CoderResult.OVERFLOW;
251 /**
252 * If the previous generation operation detected an error, return the
253 * object describing that error.
254 */
255 public CoderResult error() {
256 return error;
257 }
259 /**
260 * Generates one or two UTF-16 characters to represent the given UCS-4
261 * character.
262 *
263 * @param uc The UCS-4 character
264 * @param len The number of input bytes from which the UCS-4 value
265 * was constructed (used when creating result objects)
266 * @param dst The destination buffer, to which one or two UTF-16
267 * characters will be written
268 *
269 * @return Either a positive count of the number of UTF-16 characters
270 * written to the destination buffer, or -1, in which case
271 * error() will return a descriptive result object
272 */
273 public int generate(int uc, int len, CharBuffer dst) {
274 if (uc <= 0xffff) {
275 if (is(uc)) {
276 error = CoderResult.malformedForLength(len);
277 return -1;
278 }
279 if (dst.remaining() < 1) {
280 error = CoderResult.OVERFLOW;
281 return -1;
282 }
283 dst.put((char)uc);
284 error = null;
285 return 1;
286 }
287 if (uc < UCS4_MIN) {
288 error = CoderResult.malformedForLength(len);
289 return -1;
290 }
291 if (uc <= UCS4_MAX) {
292 if (dst.remaining() < 2) {
293 error = CoderResult.OVERFLOW;
294 return -1;
295 }
296 dst.put(high(uc));
297 dst.put(low(uc));
298 error = null;
299 return 2;
300 }
301 error = CoderResult.unmappableForLength(len);
302 return -1;
303 }
305 /**
306 * Generates one or two UTF-16 characters to represent the given UCS-4
307 * character.
308 *
309 * @param uc The UCS-4 character
310 * @param len The number of input bytes from which the UCS-4 value
311 * was constructed (used when creating result objects)
312 * @param da The destination array, to which one or two UTF-16
313 * characters will be written
314 * @param dp The destination position
315 * @param dl The destination limit
316 *
317 * @return Either a positive count of the number of UTF-16 characters
318 * written to the destination buffer, or -1, in which case
319 * error() will return a descriptive result object
320 */
321 public int generate(int uc, int len, char[] da, int dp, int dl) {
322 if (uc <= 0xffff) {
323 if (is(uc)) {
324 error = CoderResult.malformedForLength(len);
325 return -1;
326 }
327 if (dl - dp < 1) {
328 error = CoderResult.OVERFLOW;
329 return -1;
330 }
331 da[dp] = (char)uc;
332 error = null;
333 return 1;
334 }
335 if (uc < UCS4_MIN) {
336 error = CoderResult.malformedForLength(len);
337 return -1;
338 }
339 if (uc <= UCS4_MAX) {
340 if (dl - dp < 2) {
341 error = CoderResult.OVERFLOW;
342 return -1;
343 }
344 da[dp] = high(uc);
345 da[dp + 1] = low(uc);
346 error = null;
347 return 2;
348 }
349 error = CoderResult.unmappableForLength(len);
350 return -1;
351 }
353 }
355 }