src/share/classes/com/sun/codemodel/internal/util/Surrogate.java

changeset 1
0961a4a21176
child 45
31822b475baa
equal deleted inserted replaced
-1:000000000000 1:0961a4a21176
1 /*
2 * Copyright 2006 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26 package com.sun.codemodel.internal.util;
27
28 import java.nio.CharBuffer;
29 import java.nio.charset.CoderResult;
30
31
32 /**
33 * Utility class for dealing with surrogates.
34 *
35 * @author Mark Reinhold
36 */
37
38 class Surrogate {
39
40 private Surrogate() { }
41
42 // UTF-16 surrogate-character ranges
43 //
44 public static final char MIN_HIGH = '\uD800';
45 public static final char MAX_HIGH = '\uDBFF';
46 public static final char MIN_LOW = '\uDC00';
47 public static final char MAX_LOW = '\uDFFF';
48 public static final char MIN = MIN_HIGH;
49 public static final char MAX = MAX_LOW;
50
51 // Range of UCS-4 values that need surrogates in UTF-16
52 //
53 public static final int UCS4_MIN = 0x10000;
54 public static final int UCS4_MAX = (1 << 20) + UCS4_MIN - 1;
55
56 /**
57 * Tells whether or not the given UTF-16 value is a high surrogate.
58 */
59 public static boolean isHigh(int c) {
60 return (MIN_HIGH <= c) && (c <= MAX_HIGH);
61 }
62
63 /**
64 * Tells whether or not the given UTF-16 value is a low surrogate.
65 */
66 public static boolean isLow(int c) {
67 return (MIN_LOW <= c) && (c <= MAX_LOW);
68 }
69
70 /**
71 * Tells whether or not the given UTF-16 value is a surrogate character,
72 */
73 public static boolean is(int c) {
74 return (MIN <= c) && (c <= MAX);
75 }
76
77 /**
78 * Tells whether or not the given UCS-4 character must be represented as a
79 * surrogate pair in UTF-16.
80 */
81 public static boolean neededFor(int uc) {
82 return (uc >= UCS4_MIN) && (uc <= UCS4_MAX);
83 }
84
85 /**
86 * Returns the high UTF-16 surrogate for the given UCS-4 character.
87 */
88 public static char high(int uc) {
89 return (char)(0xd800 | (((uc - UCS4_MIN) >> 10) & 0x3ff));
90 }
91
92 /**
93 * Returns the low UTF-16 surrogate for the given UCS-4 character.
94 */
95 public static char low(int uc) {
96 return (char)(0xdc00 | ((uc - UCS4_MIN) & 0x3ff));
97 }
98
99 /**
100 * Converts the given surrogate pair into a 32-bit UCS-4 character.
101 */
102 public static int toUCS4(char c, char d) {
103 return (((c & 0x3ff) << 10) | (d & 0x3ff)) + 0x10000;
104 }
105
106 /**
107 * Surrogate parsing support. Charset implementations may use instances of
108 * this class to handle the details of parsing UTF-16 surrogate pairs.
109 */
110 public static class Parser {
111
112 public Parser() { }
113
114 private int character; // UCS-4
115 private CoderResult error = CoderResult.UNDERFLOW;
116 private boolean isPair;
117
118 /**
119 * Returns the UCS-4 character previously parsed.
120 */
121 public int character() {
122 return character;
123 }
124
125 /**
126 * Tells whether or not the previously-parsed UCS-4 character was
127 * originally represented by a surrogate pair.
128 */
129 public boolean isPair() {
130 return isPair;
131 }
132
133 /**
134 * Returns the number of UTF-16 characters consumed by the previous
135 * parse.
136 */
137 public int increment() {
138 return isPair ? 2 : 1;
139 }
140
141 /**
142 * If the previous parse operation detected an error, return the object
143 * describing that error.
144 */
145 public CoderResult error() {
146 return error;
147 }
148
149 /**
150 * Returns an unmappable-input result object, with the appropriate
151 * input length, for the previously-parsed character.
152 */
153 public CoderResult unmappableResult() {
154 return CoderResult.unmappableForLength(isPair ? 2 : 1);
155 }
156
157 /**
158 * Parses a UCS-4 character from the given source buffer, handling
159 * surrogates.
160 *
161 * @param c The first character
162 * @param in The source buffer, from which one more character
163 * will be consumed if c is a high surrogate
164 *
165 * @return Either a parsed UCS-4 character, in which case the isPair()
166 * and increment() methods will return meaningful values, or
167 * -1, in which case error() will return a descriptive result
168 * object
169 */
170 public int parse(char c, CharBuffer in) {
171 if (isHigh(c)) {
172 if (!in.hasRemaining()) {
173 error = CoderResult.UNDERFLOW;
174 return -1;
175 }
176 char d = in.get();
177 if (isLow(d)) {
178 character = toUCS4(c, d);
179 isPair = true;
180 error = null;
181 return character;
182 }
183 error = CoderResult.malformedForLength(1);
184 return -1;
185 }
186 if (isLow(c)) {
187 error = CoderResult.malformedForLength(1);
188 return -1;
189 }
190 character = c;
191 isPair = false;
192 error = null;
193 return character;
194 }
195
196 /**
197 * Parses a UCS-4 character from the given source buffer, handling
198 * surrogates.
199 *
200 * @param c The first character
201 * @param ia The input array, from which one more character
202 * will be consumed if c is a high surrogate
203 * @param ip The input index
204 * @param il The input limit
205 *
206 * @return Either a parsed UCS-4 character, in which case the isPair()
207 * and increment() methods will return meaningful values, or
208 * -1, in which case error() will return a descriptive result
209 * object
210 */
211 public int parse(char c, char[] ia, int ip, int il) {
212 if (isHigh(c)) {
213 if (il - ip < 2) {
214 error = CoderResult.UNDERFLOW;
215 return -1;
216 }
217 char d = ia[ip + 1];
218 if (isLow(d)) {
219 character = toUCS4(c, d);
220 isPair = true;
221 error = null;
222 return character;
223 }
224 error = CoderResult.malformedForLength(1);
225 return -1;
226 }
227 if (isLow(c)) {
228 error = CoderResult.malformedForLength(1);
229 return -1;
230 }
231 character = c;
232 isPair = false;
233 error = null;
234 return character;
235 }
236
237 }
238
239 /**
240 * Surrogate generation support. Charset implementations may use instances
241 * of this class to handle the details of generating UTF-16 surrogate
242 * pairs.
243 */
244 public static class Generator {
245
246 public Generator() { }
247
248 private CoderResult error = CoderResult.OVERFLOW;
249
250 /**
251 * If the previous generation operation detected an error, return the
252 * object describing that error.
253 */
254 public CoderResult error() {
255 return error;
256 }
257
258 /**
259 * Generates one or two UTF-16 characters to represent the given UCS-4
260 * character.
261 *
262 * @param uc The UCS-4 character
263 * @param len The number of input bytes from which the UCS-4 value
264 * was constructed (used when creating result objects)
265 * @param dst The destination buffer, to which one or two UTF-16
266 * characters will be written
267 *
268 * @return Either a positive count of the number of UTF-16 characters
269 * written to the destination buffer, or -1, in which case
270 * error() will return a descriptive result object
271 */
272 public int generate(int uc, int len, CharBuffer dst) {
273 if (uc <= 0xffff) {
274 if (is(uc)) {
275 error = CoderResult.malformedForLength(len);
276 return -1;
277 }
278 if (dst.remaining() < 1) {
279 error = CoderResult.OVERFLOW;
280 return -1;
281 }
282 dst.put((char)uc);
283 error = null;
284 return 1;
285 }
286 if (uc < UCS4_MIN) {
287 error = CoderResult.malformedForLength(len);
288 return -1;
289 }
290 if (uc <= UCS4_MAX) {
291 if (dst.remaining() < 2) {
292 error = CoderResult.OVERFLOW;
293 return -1;
294 }
295 dst.put(high(uc));
296 dst.put(low(uc));
297 error = null;
298 return 2;
299 }
300 error = CoderResult.unmappableForLength(len);
301 return -1;
302 }
303
304 /**
305 * Generates one or two UTF-16 characters to represent the given UCS-4
306 * character.
307 *
308 * @param uc The UCS-4 character
309 * @param len The number of input bytes from which the UCS-4 value
310 * was constructed (used when creating result objects)
311 * @param da The destination array, to which one or two UTF-16
312 * characters will be written
313 * @param dp The destination position
314 * @param dl The destination limit
315 *
316 * @return Either a positive count of the number of UTF-16 characters
317 * written to the destination buffer, or -1, in which case
318 * error() will return a descriptive result object
319 */
320 public int generate(int uc, int len, char[] da, int dp, int dl) {
321 if (uc <= 0xffff) {
322 if (is(uc)) {
323 error = CoderResult.malformedForLength(len);
324 return -1;
325 }
326 if (dl - dp < 1) {
327 error = CoderResult.OVERFLOW;
328 return -1;
329 }
330 da[dp] = (char)uc;
331 error = null;
332 return 1;
333 }
334 if (uc < UCS4_MIN) {
335 error = CoderResult.malformedForLength(len);
336 return -1;
337 }
338 if (uc <= UCS4_MAX) {
339 if (dl - dp < 2) {
340 error = CoderResult.OVERFLOW;
341 return -1;
342 }
343 da[dp] = high(uc);
344 da[dp + 1] = low(uc);
345 error = null;
346 return 2;
347 }
348 error = CoderResult.unmappableForLength(len);
349 return -1;
350 }
351
352 }
353
354 }

mercurial