src/share/classes/com/sun/codemodel/internal/util/Surrogate.java

Mon, 04 May 2009 21:10:41 -0700

author
tbell
date
Mon, 04 May 2009 21:10:41 -0700
changeset 50
42dfec6871f6
parent 45
31822b475baa
permissions
-rw-r--r--

6658158: Mutable statics in SAAJ (findbugs)
6658163: txw2.DatatypeWriter.BUILDIN is a mutable static (findbugs)
Reviewed-by: darcy

duke@1 1 /*
tbell@45 2 * Copyright 2005-2006 Sun Microsystems, Inc. All Rights Reserved.
duke@1 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
duke@1 4 *
duke@1 5 * This code is free software; you can redistribute it and/or modify it
duke@1 6 * under the terms of the GNU General Public License version 2 only, as
duke@1 7 * published by the Free Software Foundation. Sun designates this
duke@1 8 * particular file as subject to the "Classpath" exception as provided
duke@1 9 * by Sun in the LICENSE file that accompanied this code.
duke@1 10 *
duke@1 11 * This code is distributed in the hope that it will be useful, but WITHOUT
duke@1 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
duke@1 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
duke@1 14 * version 2 for more details (a copy is included in the LICENSE file that
duke@1 15 * accompanied this code).
duke@1 16 *
duke@1 17 * You should have received a copy of the GNU General Public License version
duke@1 18 * 2 along with this work; if not, write to the Free Software Foundation,
duke@1 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
duke@1 20 *
duke@1 21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
duke@1 22 * CA 95054 USA or visit www.sun.com if you need additional information or
duke@1 23 * have any questions.
duke@1 24 */
duke@1 25
duke@1 26 package com.sun.codemodel.internal.util;
duke@1 27
duke@1 28 import java.nio.CharBuffer;
duke@1 29 import java.nio.charset.CoderResult;
duke@1 30
duke@1 31
duke@1 32 /**
duke@1 33 * Utility class for dealing with surrogates.
duke@1 34 *
duke@1 35 * @author Mark Reinhold
tbell@45 36 * @version 1.11, 03/01/23
duke@1 37 */
duke@1 38
duke@1 39 class Surrogate {
duke@1 40
duke@1 41 private Surrogate() { }
duke@1 42
duke@1 43 // UTF-16 surrogate-character ranges
duke@1 44 //
duke@1 45 public static final char MIN_HIGH = '\uD800';
duke@1 46 public static final char MAX_HIGH = '\uDBFF';
duke@1 47 public static final char MIN_LOW = '\uDC00';
duke@1 48 public static final char MAX_LOW = '\uDFFF';
duke@1 49 public static final char MIN = MIN_HIGH;
duke@1 50 public static final char MAX = MAX_LOW;
duke@1 51
duke@1 52 // Range of UCS-4 values that need surrogates in UTF-16
duke@1 53 //
duke@1 54 public static final int UCS4_MIN = 0x10000;
duke@1 55 public static final int UCS4_MAX = (1 << 20) + UCS4_MIN - 1;
duke@1 56
duke@1 57 /**
duke@1 58 * Tells whether or not the given UTF-16 value is a high surrogate.
duke@1 59 */
duke@1 60 public static boolean isHigh(int c) {
duke@1 61 return (MIN_HIGH <= c) && (c <= MAX_HIGH);
duke@1 62 }
duke@1 63
duke@1 64 /**
duke@1 65 * Tells whether or not the given UTF-16 value is a low surrogate.
duke@1 66 */
duke@1 67 public static boolean isLow(int c) {
duke@1 68 return (MIN_LOW <= c) && (c <= MAX_LOW);
duke@1 69 }
duke@1 70
duke@1 71 /**
duke@1 72 * Tells whether or not the given UTF-16 value is a surrogate character,
duke@1 73 */
duke@1 74 public static boolean is(int c) {
duke@1 75 return (MIN <= c) && (c <= MAX);
duke@1 76 }
duke@1 77
duke@1 78 /**
duke@1 79 * Tells whether or not the given UCS-4 character must be represented as a
duke@1 80 * surrogate pair in UTF-16.
duke@1 81 */
duke@1 82 public static boolean neededFor(int uc) {
duke@1 83 return (uc >= UCS4_MIN) && (uc <= UCS4_MAX);
duke@1 84 }
duke@1 85
duke@1 86 /**
duke@1 87 * Returns the high UTF-16 surrogate for the given UCS-4 character.
duke@1 88 */
duke@1 89 public static char high(int uc) {
duke@1 90 return (char)(0xd800 | (((uc - UCS4_MIN) >> 10) & 0x3ff));
duke@1 91 }
duke@1 92
duke@1 93 /**
duke@1 94 * Returns the low UTF-16 surrogate for the given UCS-4 character.
duke@1 95 */
duke@1 96 public static char low(int uc) {
duke@1 97 return (char)(0xdc00 | ((uc - UCS4_MIN) & 0x3ff));
duke@1 98 }
duke@1 99
duke@1 100 /**
duke@1 101 * Converts the given surrogate pair into a 32-bit UCS-4 character.
duke@1 102 */
duke@1 103 public static int toUCS4(char c, char d) {
duke@1 104 return (((c & 0x3ff) << 10) | (d & 0x3ff)) + 0x10000;
duke@1 105 }
duke@1 106
duke@1 107 /**
duke@1 108 * Surrogate parsing support. Charset implementations may use instances of
duke@1 109 * this class to handle the details of parsing UTF-16 surrogate pairs.
duke@1 110 */
duke@1 111 public static class Parser {
duke@1 112
duke@1 113 public Parser() { }
duke@1 114
tbell@50 115 private int character; // UCS-4
duke@1 116 private CoderResult error = CoderResult.UNDERFLOW;
duke@1 117 private boolean isPair;
duke@1 118
duke@1 119 /**
duke@1 120 * Returns the UCS-4 character previously parsed.
duke@1 121 */
duke@1 122 public int character() {
duke@1 123 return character;
duke@1 124 }
duke@1 125
duke@1 126 /**
duke@1 127 * Tells whether or not the previously-parsed UCS-4 character was
duke@1 128 * originally represented by a surrogate pair.
duke@1 129 */
duke@1 130 public boolean isPair() {
duke@1 131 return isPair;
duke@1 132 }
duke@1 133
duke@1 134 /**
duke@1 135 * Returns the number of UTF-16 characters consumed by the previous
duke@1 136 * parse.
duke@1 137 */
duke@1 138 public int increment() {
duke@1 139 return isPair ? 2 : 1;
duke@1 140 }
duke@1 141
duke@1 142 /**
duke@1 143 * If the previous parse operation detected an error, return the object
duke@1 144 * describing that error.
duke@1 145 */
duke@1 146 public CoderResult error() {
duke@1 147 return error;
duke@1 148 }
duke@1 149
duke@1 150 /**
duke@1 151 * Returns an unmappable-input result object, with the appropriate
duke@1 152 * input length, for the previously-parsed character.
duke@1 153 */
duke@1 154 public CoderResult unmappableResult() {
duke@1 155 return CoderResult.unmappableForLength(isPair ? 2 : 1);
duke@1 156 }
duke@1 157
duke@1 158 /**
duke@1 159 * Parses a UCS-4 character from the given source buffer, handling
duke@1 160 * surrogates.
duke@1 161 *
duke@1 162 * @param c The first character
duke@1 163 * @param in The source buffer, from which one more character
duke@1 164 * will be consumed if c is a high surrogate
duke@1 165 *
duke@1 166 * @return Either a parsed UCS-4 character, in which case the isPair()
duke@1 167 * and increment() methods will return meaningful values, or
duke@1 168 * -1, in which case error() will return a descriptive result
duke@1 169 * object
duke@1 170 */
duke@1 171 public int parse(char c, CharBuffer in) {
duke@1 172 if (isHigh(c)) {
duke@1 173 if (!in.hasRemaining()) {
duke@1 174 error = CoderResult.UNDERFLOW;
duke@1 175 return -1;
duke@1 176 }
duke@1 177 char d = in.get();
duke@1 178 if (isLow(d)) {
duke@1 179 character = toUCS4(c, d);
duke@1 180 isPair = true;
duke@1 181 error = null;
duke@1 182 return character;
duke@1 183 }
duke@1 184 error = CoderResult.malformedForLength(1);
duke@1 185 return -1;
duke@1 186 }
duke@1 187 if (isLow(c)) {
duke@1 188 error = CoderResult.malformedForLength(1);
duke@1 189 return -1;
duke@1 190 }
duke@1 191 character = c;
duke@1 192 isPair = false;
duke@1 193 error = null;
duke@1 194 return character;
duke@1 195 }
duke@1 196
duke@1 197 /**
duke@1 198 * Parses a UCS-4 character from the given source buffer, handling
duke@1 199 * surrogates.
duke@1 200 *
duke@1 201 * @param c The first character
duke@1 202 * @param ia The input array, from which one more character
duke@1 203 * will be consumed if c is a high surrogate
duke@1 204 * @param ip The input index
duke@1 205 * @param il The input limit
duke@1 206 *
duke@1 207 * @return Either a parsed UCS-4 character, in which case the isPair()
duke@1 208 * and increment() methods will return meaningful values, or
duke@1 209 * -1, in which case error() will return a descriptive result
duke@1 210 * object
duke@1 211 */
duke@1 212 public int parse(char c, char[] ia, int ip, int il) {
duke@1 213 if (isHigh(c)) {
duke@1 214 if (il - ip < 2) {
duke@1 215 error = CoderResult.UNDERFLOW;
duke@1 216 return -1;
duke@1 217 }
duke@1 218 char d = ia[ip + 1];
duke@1 219 if (isLow(d)) {
duke@1 220 character = toUCS4(c, d);
duke@1 221 isPair = true;
duke@1 222 error = null;
duke@1 223 return character;
duke@1 224 }
duke@1 225 error = CoderResult.malformedForLength(1);
duke@1 226 return -1;
duke@1 227 }
duke@1 228 if (isLow(c)) {
duke@1 229 error = CoderResult.malformedForLength(1);
duke@1 230 return -1;
duke@1 231 }
duke@1 232 character = c;
duke@1 233 isPair = false;
duke@1 234 error = null;
duke@1 235 return character;
duke@1 236 }
duke@1 237
duke@1 238 }
duke@1 239
duke@1 240 /**
duke@1 241 * Surrogate generation support. Charset implementations may use instances
duke@1 242 * of this class to handle the details of generating UTF-16 surrogate
duke@1 243 * pairs.
duke@1 244 */
duke@1 245 public static class Generator {
duke@1 246
duke@1 247 public Generator() { }
duke@1 248
duke@1 249 private CoderResult error = CoderResult.OVERFLOW;
duke@1 250
duke@1 251 /**
duke@1 252 * If the previous generation operation detected an error, return the
duke@1 253 * object describing that error.
duke@1 254 */
duke@1 255 public CoderResult error() {
duke@1 256 return error;
duke@1 257 }
duke@1 258
duke@1 259 /**
duke@1 260 * Generates one or two UTF-16 characters to represent the given UCS-4
duke@1 261 * character.
duke@1 262 *
duke@1 263 * @param uc The UCS-4 character
duke@1 264 * @param len The number of input bytes from which the UCS-4 value
duke@1 265 * was constructed (used when creating result objects)
duke@1 266 * @param dst The destination buffer, to which one or two UTF-16
duke@1 267 * characters will be written
duke@1 268 *
duke@1 269 * @return Either a positive count of the number of UTF-16 characters
duke@1 270 * written to the destination buffer, or -1, in which case
duke@1 271 * error() will return a descriptive result object
duke@1 272 */
duke@1 273 public int generate(int uc, int len, CharBuffer dst) {
duke@1 274 if (uc <= 0xffff) {
duke@1 275 if (is(uc)) {
duke@1 276 error = CoderResult.malformedForLength(len);
duke@1 277 return -1;
duke@1 278 }
duke@1 279 if (dst.remaining() < 1) {
duke@1 280 error = CoderResult.OVERFLOW;
duke@1 281 return -1;
duke@1 282 }
duke@1 283 dst.put((char)uc);
duke@1 284 error = null;
duke@1 285 return 1;
duke@1 286 }
duke@1 287 if (uc < UCS4_MIN) {
duke@1 288 error = CoderResult.malformedForLength(len);
duke@1 289 return -1;
duke@1 290 }
duke@1 291 if (uc <= UCS4_MAX) {
duke@1 292 if (dst.remaining() < 2) {
duke@1 293 error = CoderResult.OVERFLOW;
duke@1 294 return -1;
duke@1 295 }
duke@1 296 dst.put(high(uc));
duke@1 297 dst.put(low(uc));
duke@1 298 error = null;
duke@1 299 return 2;
duke@1 300 }
duke@1 301 error = CoderResult.unmappableForLength(len);
duke@1 302 return -1;
duke@1 303 }
duke@1 304
duke@1 305 /**
duke@1 306 * Generates one or two UTF-16 characters to represent the given UCS-4
duke@1 307 * character.
duke@1 308 *
duke@1 309 * @param uc The UCS-4 character
duke@1 310 * @param len The number of input bytes from which the UCS-4 value
duke@1 311 * was constructed (used when creating result objects)
duke@1 312 * @param da The destination array, to which one or two UTF-16
duke@1 313 * characters will be written
duke@1 314 * @param dp The destination position
duke@1 315 * @param dl The destination limit
duke@1 316 *
duke@1 317 * @return Either a positive count of the number of UTF-16 characters
duke@1 318 * written to the destination buffer, or -1, in which case
duke@1 319 * error() will return a descriptive result object
duke@1 320 */
duke@1 321 public int generate(int uc, int len, char[] da, int dp, int dl) {
duke@1 322 if (uc <= 0xffff) {
duke@1 323 if (is(uc)) {
duke@1 324 error = CoderResult.malformedForLength(len);
duke@1 325 return -1;
duke@1 326 }
duke@1 327 if (dl - dp < 1) {
duke@1 328 error = CoderResult.OVERFLOW;
duke@1 329 return -1;
duke@1 330 }
duke@1 331 da[dp] = (char)uc;
duke@1 332 error = null;
duke@1 333 return 1;
duke@1 334 }
duke@1 335 if (uc < UCS4_MIN) {
duke@1 336 error = CoderResult.malformedForLength(len);
duke@1 337 return -1;
duke@1 338 }
duke@1 339 if (uc <= UCS4_MAX) {
duke@1 340 if (dl - dp < 2) {
duke@1 341 error = CoderResult.OVERFLOW;
duke@1 342 return -1;
duke@1 343 }
duke@1 344 da[dp] = high(uc);
duke@1 345 da[dp + 1] = low(uc);
duke@1 346 error = null;
duke@1 347 return 2;
duke@1 348 }
duke@1 349 error = CoderResult.unmappableForLength(len);
duke@1 350 return -1;
duke@1 351 }
duke@1 352
duke@1 353 }
duke@1 354
duke@1 355 }

mercurial