Mon, 04 May 2009 21:10:41 -0700
6658158: Mutable statics in SAAJ (findbugs)
6658163: txw2.DatatypeWriter.BUILDIN is a mutable static (findbugs)
Reviewed-by: darcy
duke@1 | 1 | /* |
tbell@45 | 2 | * Copyright 2005-2006 Sun Microsystems, Inc. All Rights Reserved. |
duke@1 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
duke@1 | 4 | * |
duke@1 | 5 | * This code is free software; you can redistribute it and/or modify it |
duke@1 | 6 | * under the terms of the GNU General Public License version 2 only, as |
duke@1 | 7 | * published by the Free Software Foundation. Sun designates this |
duke@1 | 8 | * particular file as subject to the "Classpath" exception as provided |
duke@1 | 9 | * by Sun in the LICENSE file that accompanied this code. |
duke@1 | 10 | * |
duke@1 | 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
duke@1 | 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
duke@1 | 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
duke@1 | 14 | * version 2 for more details (a copy is included in the LICENSE file that |
duke@1 | 15 | * accompanied this code). |
duke@1 | 16 | * |
duke@1 | 17 | * You should have received a copy of the GNU General Public License version |
duke@1 | 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
duke@1 | 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
duke@1 | 20 | * |
duke@1 | 21 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
duke@1 | 22 | * CA 95054 USA or visit www.sun.com if you need additional information or |
duke@1 | 23 | * have any questions. |
duke@1 | 24 | */ |
duke@1 | 25 | |
duke@1 | 26 | package com.sun.codemodel.internal.util; |
duke@1 | 27 | |
duke@1 | 28 | import java.nio.CharBuffer; |
duke@1 | 29 | import java.nio.charset.CoderResult; |
duke@1 | 30 | |
duke@1 | 31 | |
duke@1 | 32 | /** |
duke@1 | 33 | * Utility class for dealing with surrogates. |
duke@1 | 34 | * |
duke@1 | 35 | * @author Mark Reinhold |
tbell@45 | 36 | * @version 1.11, 03/01/23 |
duke@1 | 37 | */ |
duke@1 | 38 | |
duke@1 | 39 | class Surrogate { |
duke@1 | 40 | |
duke@1 | 41 | private Surrogate() { } |
duke@1 | 42 | |
duke@1 | 43 | // UTF-16 surrogate-character ranges |
duke@1 | 44 | // |
duke@1 | 45 | public static final char MIN_HIGH = '\uD800'; |
duke@1 | 46 | public static final char MAX_HIGH = '\uDBFF'; |
duke@1 | 47 | public static final char MIN_LOW = '\uDC00'; |
duke@1 | 48 | public static final char MAX_LOW = '\uDFFF'; |
duke@1 | 49 | public static final char MIN = MIN_HIGH; |
duke@1 | 50 | public static final char MAX = MAX_LOW; |
duke@1 | 51 | |
duke@1 | 52 | // Range of UCS-4 values that need surrogates in UTF-16 |
duke@1 | 53 | // |
duke@1 | 54 | public static final int UCS4_MIN = 0x10000; |
duke@1 | 55 | public static final int UCS4_MAX = (1 << 20) + UCS4_MIN - 1; |
duke@1 | 56 | |
duke@1 | 57 | /** |
duke@1 | 58 | * Tells whether or not the given UTF-16 value is a high surrogate. |
duke@1 | 59 | */ |
duke@1 | 60 | public static boolean isHigh(int c) { |
duke@1 | 61 | return (MIN_HIGH <= c) && (c <= MAX_HIGH); |
duke@1 | 62 | } |
duke@1 | 63 | |
duke@1 | 64 | /** |
duke@1 | 65 | * Tells whether or not the given UTF-16 value is a low surrogate. |
duke@1 | 66 | */ |
duke@1 | 67 | public static boolean isLow(int c) { |
duke@1 | 68 | return (MIN_LOW <= c) && (c <= MAX_LOW); |
duke@1 | 69 | } |
duke@1 | 70 | |
duke@1 | 71 | /** |
duke@1 | 72 | * Tells whether or not the given UTF-16 value is a surrogate character, |
duke@1 | 73 | */ |
duke@1 | 74 | public static boolean is(int c) { |
duke@1 | 75 | return (MIN <= c) && (c <= MAX); |
duke@1 | 76 | } |
duke@1 | 77 | |
duke@1 | 78 | /** |
duke@1 | 79 | * Tells whether or not the given UCS-4 character must be represented as a |
duke@1 | 80 | * surrogate pair in UTF-16. |
duke@1 | 81 | */ |
duke@1 | 82 | public static boolean neededFor(int uc) { |
duke@1 | 83 | return (uc >= UCS4_MIN) && (uc <= UCS4_MAX); |
duke@1 | 84 | } |
duke@1 | 85 | |
duke@1 | 86 | /** |
duke@1 | 87 | * Returns the high UTF-16 surrogate for the given UCS-4 character. |
duke@1 | 88 | */ |
duke@1 | 89 | public static char high(int uc) { |
duke@1 | 90 | return (char)(0xd800 | (((uc - UCS4_MIN) >> 10) & 0x3ff)); |
duke@1 | 91 | } |
duke@1 | 92 | |
duke@1 | 93 | /** |
duke@1 | 94 | * Returns the low UTF-16 surrogate for the given UCS-4 character. |
duke@1 | 95 | */ |
duke@1 | 96 | public static char low(int uc) { |
duke@1 | 97 | return (char)(0xdc00 | ((uc - UCS4_MIN) & 0x3ff)); |
duke@1 | 98 | } |
duke@1 | 99 | |
duke@1 | 100 | /** |
duke@1 | 101 | * Converts the given surrogate pair into a 32-bit UCS-4 character. |
duke@1 | 102 | */ |
duke@1 | 103 | public static int toUCS4(char c, char d) { |
duke@1 | 104 | return (((c & 0x3ff) << 10) | (d & 0x3ff)) + 0x10000; |
duke@1 | 105 | } |
duke@1 | 106 | |
duke@1 | 107 | /** |
duke@1 | 108 | * Surrogate parsing support. Charset implementations may use instances of |
duke@1 | 109 | * this class to handle the details of parsing UTF-16 surrogate pairs. |
duke@1 | 110 | */ |
duke@1 | 111 | public static class Parser { |
duke@1 | 112 | |
duke@1 | 113 | public Parser() { } |
duke@1 | 114 | |
tbell@50 | 115 | private int character; // UCS-4 |
duke@1 | 116 | private CoderResult error = CoderResult.UNDERFLOW; |
duke@1 | 117 | private boolean isPair; |
duke@1 | 118 | |
duke@1 | 119 | /** |
duke@1 | 120 | * Returns the UCS-4 character previously parsed. |
duke@1 | 121 | */ |
duke@1 | 122 | public int character() { |
duke@1 | 123 | return character; |
duke@1 | 124 | } |
duke@1 | 125 | |
duke@1 | 126 | /** |
duke@1 | 127 | * Tells whether or not the previously-parsed UCS-4 character was |
duke@1 | 128 | * originally represented by a surrogate pair. |
duke@1 | 129 | */ |
duke@1 | 130 | public boolean isPair() { |
duke@1 | 131 | return isPair; |
duke@1 | 132 | } |
duke@1 | 133 | |
duke@1 | 134 | /** |
duke@1 | 135 | * Returns the number of UTF-16 characters consumed by the previous |
duke@1 | 136 | * parse. |
duke@1 | 137 | */ |
duke@1 | 138 | public int increment() { |
duke@1 | 139 | return isPair ? 2 : 1; |
duke@1 | 140 | } |
duke@1 | 141 | |
duke@1 | 142 | /** |
duke@1 | 143 | * If the previous parse operation detected an error, return the object |
duke@1 | 144 | * describing that error. |
duke@1 | 145 | */ |
duke@1 | 146 | public CoderResult error() { |
duke@1 | 147 | return error; |
duke@1 | 148 | } |
duke@1 | 149 | |
duke@1 | 150 | /** |
duke@1 | 151 | * Returns an unmappable-input result object, with the appropriate |
duke@1 | 152 | * input length, for the previously-parsed character. |
duke@1 | 153 | */ |
duke@1 | 154 | public CoderResult unmappableResult() { |
duke@1 | 155 | return CoderResult.unmappableForLength(isPair ? 2 : 1); |
duke@1 | 156 | } |
duke@1 | 157 | |
duke@1 | 158 | /** |
duke@1 | 159 | * Parses a UCS-4 character from the given source buffer, handling |
duke@1 | 160 | * surrogates. |
duke@1 | 161 | * |
duke@1 | 162 | * @param c The first character |
duke@1 | 163 | * @param in The source buffer, from which one more character |
duke@1 | 164 | * will be consumed if c is a high surrogate |
duke@1 | 165 | * |
duke@1 | 166 | * @return Either a parsed UCS-4 character, in which case the isPair() |
duke@1 | 167 | * and increment() methods will return meaningful values, or |
duke@1 | 168 | * -1, in which case error() will return a descriptive result |
duke@1 | 169 | * object |
duke@1 | 170 | */ |
duke@1 | 171 | public int parse(char c, CharBuffer in) { |
duke@1 | 172 | if (isHigh(c)) { |
duke@1 | 173 | if (!in.hasRemaining()) { |
duke@1 | 174 | error = CoderResult.UNDERFLOW; |
duke@1 | 175 | return -1; |
duke@1 | 176 | } |
duke@1 | 177 | char d = in.get(); |
duke@1 | 178 | if (isLow(d)) { |
duke@1 | 179 | character = toUCS4(c, d); |
duke@1 | 180 | isPair = true; |
duke@1 | 181 | error = null; |
duke@1 | 182 | return character; |
duke@1 | 183 | } |
duke@1 | 184 | error = CoderResult.malformedForLength(1); |
duke@1 | 185 | return -1; |
duke@1 | 186 | } |
duke@1 | 187 | if (isLow(c)) { |
duke@1 | 188 | error = CoderResult.malformedForLength(1); |
duke@1 | 189 | return -1; |
duke@1 | 190 | } |
duke@1 | 191 | character = c; |
duke@1 | 192 | isPair = false; |
duke@1 | 193 | error = null; |
duke@1 | 194 | return character; |
duke@1 | 195 | } |
duke@1 | 196 | |
duke@1 | 197 | /** |
duke@1 | 198 | * Parses a UCS-4 character from the given source buffer, handling |
duke@1 | 199 | * surrogates. |
duke@1 | 200 | * |
duke@1 | 201 | * @param c The first character |
duke@1 | 202 | * @param ia The input array, from which one more character |
duke@1 | 203 | * will be consumed if c is a high surrogate |
duke@1 | 204 | * @param ip The input index |
duke@1 | 205 | * @param il The input limit |
duke@1 | 206 | * |
duke@1 | 207 | * @return Either a parsed UCS-4 character, in which case the isPair() |
duke@1 | 208 | * and increment() methods will return meaningful values, or |
duke@1 | 209 | * -1, in which case error() will return a descriptive result |
duke@1 | 210 | * object |
duke@1 | 211 | */ |
duke@1 | 212 | public int parse(char c, char[] ia, int ip, int il) { |
duke@1 | 213 | if (isHigh(c)) { |
duke@1 | 214 | if (il - ip < 2) { |
duke@1 | 215 | error = CoderResult.UNDERFLOW; |
duke@1 | 216 | return -1; |
duke@1 | 217 | } |
duke@1 | 218 | char d = ia[ip + 1]; |
duke@1 | 219 | if (isLow(d)) { |
duke@1 | 220 | character = toUCS4(c, d); |
duke@1 | 221 | isPair = true; |
duke@1 | 222 | error = null; |
duke@1 | 223 | return character; |
duke@1 | 224 | } |
duke@1 | 225 | error = CoderResult.malformedForLength(1); |
duke@1 | 226 | return -1; |
duke@1 | 227 | } |
duke@1 | 228 | if (isLow(c)) { |
duke@1 | 229 | error = CoderResult.malformedForLength(1); |
duke@1 | 230 | return -1; |
duke@1 | 231 | } |
duke@1 | 232 | character = c; |
duke@1 | 233 | isPair = false; |
duke@1 | 234 | error = null; |
duke@1 | 235 | return character; |
duke@1 | 236 | } |
duke@1 | 237 | |
duke@1 | 238 | } |
duke@1 | 239 | |
duke@1 | 240 | /** |
duke@1 | 241 | * Surrogate generation support. Charset implementations may use instances |
duke@1 | 242 | * of this class to handle the details of generating UTF-16 surrogate |
duke@1 | 243 | * pairs. |
duke@1 | 244 | */ |
duke@1 | 245 | public static class Generator { |
duke@1 | 246 | |
duke@1 | 247 | public Generator() { } |
duke@1 | 248 | |
duke@1 | 249 | private CoderResult error = CoderResult.OVERFLOW; |
duke@1 | 250 | |
duke@1 | 251 | /** |
duke@1 | 252 | * If the previous generation operation detected an error, return the |
duke@1 | 253 | * object describing that error. |
duke@1 | 254 | */ |
duke@1 | 255 | public CoderResult error() { |
duke@1 | 256 | return error; |
duke@1 | 257 | } |
duke@1 | 258 | |
duke@1 | 259 | /** |
duke@1 | 260 | * Generates one or two UTF-16 characters to represent the given UCS-4 |
duke@1 | 261 | * character. |
duke@1 | 262 | * |
duke@1 | 263 | * @param uc The UCS-4 character |
duke@1 | 264 | * @param len The number of input bytes from which the UCS-4 value |
duke@1 | 265 | * was constructed (used when creating result objects) |
duke@1 | 266 | * @param dst The destination buffer, to which one or two UTF-16 |
duke@1 | 267 | * characters will be written |
duke@1 | 268 | * |
duke@1 | 269 | * @return Either a positive count of the number of UTF-16 characters |
duke@1 | 270 | * written to the destination buffer, or -1, in which case |
duke@1 | 271 | * error() will return a descriptive result object |
duke@1 | 272 | */ |
duke@1 | 273 | public int generate(int uc, int len, CharBuffer dst) { |
duke@1 | 274 | if (uc <= 0xffff) { |
duke@1 | 275 | if (is(uc)) { |
duke@1 | 276 | error = CoderResult.malformedForLength(len); |
duke@1 | 277 | return -1; |
duke@1 | 278 | } |
duke@1 | 279 | if (dst.remaining() < 1) { |
duke@1 | 280 | error = CoderResult.OVERFLOW; |
duke@1 | 281 | return -1; |
duke@1 | 282 | } |
duke@1 | 283 | dst.put((char)uc); |
duke@1 | 284 | error = null; |
duke@1 | 285 | return 1; |
duke@1 | 286 | } |
duke@1 | 287 | if (uc < UCS4_MIN) { |
duke@1 | 288 | error = CoderResult.malformedForLength(len); |
duke@1 | 289 | return -1; |
duke@1 | 290 | } |
duke@1 | 291 | if (uc <= UCS4_MAX) { |
duke@1 | 292 | if (dst.remaining() < 2) { |
duke@1 | 293 | error = CoderResult.OVERFLOW; |
duke@1 | 294 | return -1; |
duke@1 | 295 | } |
duke@1 | 296 | dst.put(high(uc)); |
duke@1 | 297 | dst.put(low(uc)); |
duke@1 | 298 | error = null; |
duke@1 | 299 | return 2; |
duke@1 | 300 | } |
duke@1 | 301 | error = CoderResult.unmappableForLength(len); |
duke@1 | 302 | return -1; |
duke@1 | 303 | } |
duke@1 | 304 | |
duke@1 | 305 | /** |
duke@1 | 306 | * Generates one or two UTF-16 characters to represent the given UCS-4 |
duke@1 | 307 | * character. |
duke@1 | 308 | * |
duke@1 | 309 | * @param uc The UCS-4 character |
duke@1 | 310 | * @param len The number of input bytes from which the UCS-4 value |
duke@1 | 311 | * was constructed (used when creating result objects) |
duke@1 | 312 | * @param da The destination array, to which one or two UTF-16 |
duke@1 | 313 | * characters will be written |
duke@1 | 314 | * @param dp The destination position |
duke@1 | 315 | * @param dl The destination limit |
duke@1 | 316 | * |
duke@1 | 317 | * @return Either a positive count of the number of UTF-16 characters |
duke@1 | 318 | * written to the destination buffer, or -1, in which case |
duke@1 | 319 | * error() will return a descriptive result object |
duke@1 | 320 | */ |
duke@1 | 321 | public int generate(int uc, int len, char[] da, int dp, int dl) { |
duke@1 | 322 | if (uc <= 0xffff) { |
duke@1 | 323 | if (is(uc)) { |
duke@1 | 324 | error = CoderResult.malformedForLength(len); |
duke@1 | 325 | return -1; |
duke@1 | 326 | } |
duke@1 | 327 | if (dl - dp < 1) { |
duke@1 | 328 | error = CoderResult.OVERFLOW; |
duke@1 | 329 | return -1; |
duke@1 | 330 | } |
duke@1 | 331 | da[dp] = (char)uc; |
duke@1 | 332 | error = null; |
duke@1 | 333 | return 1; |
duke@1 | 334 | } |
duke@1 | 335 | if (uc < UCS4_MIN) { |
duke@1 | 336 | error = CoderResult.malformedForLength(len); |
duke@1 | 337 | return -1; |
duke@1 | 338 | } |
duke@1 | 339 | if (uc <= UCS4_MAX) { |
duke@1 | 340 | if (dl - dp < 2) { |
duke@1 | 341 | error = CoderResult.OVERFLOW; |
duke@1 | 342 | return -1; |
duke@1 | 343 | } |
duke@1 | 344 | da[dp] = high(uc); |
duke@1 | 345 | da[dp + 1] = low(uc); |
duke@1 | 346 | error = null; |
duke@1 | 347 | return 2; |
duke@1 | 348 | } |
duke@1 | 349 | error = CoderResult.unmappableForLength(len); |
duke@1 | 350 | return -1; |
duke@1 | 351 | } |
duke@1 | 352 | |
duke@1 | 353 | } |
duke@1 | 354 | |
duke@1 | 355 | } |