src/share/classes/com/sun/codemodel/internal/util/Surrogate.java

Mon, 04 May 2009 21:10:41 -0700

author
tbell
date
Mon, 04 May 2009 21:10:41 -0700
changeset 50
42dfec6871f6
parent 45
31822b475baa
permissions
-rw-r--r--

6658158: Mutable statics in SAAJ (findbugs)
6658163: txw2.DatatypeWriter.BUILDIN is a mutable static (findbugs)
Reviewed-by: darcy

     1 /*
     2  * Copyright 2005-2006 Sun Microsystems, Inc.  All Rights Reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Sun designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Sun in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    22  * CA 95054 USA or visit www.sun.com if you need additional information or
    23  * have any questions.
    24  */
    26 package com.sun.codemodel.internal.util;
    28 import java.nio.CharBuffer;
    29 import java.nio.charset.CoderResult;
    32 /**
    33  * Utility class for dealing with surrogates.
    34  *
    35  * @author Mark Reinhold
    36  * @version 1.11, 03/01/23
    37  */
    39 class Surrogate {
    41     private Surrogate() { }
    43     // UTF-16 surrogate-character ranges
    44     //
    45     public static final char MIN_HIGH = '\uD800';
    46     public static final char MAX_HIGH = '\uDBFF';
    47     public static final char MIN_LOW  = '\uDC00';
    48     public static final char MAX_LOW  = '\uDFFF';
    49     public static final char MIN = MIN_HIGH;
    50     public static final char MAX = MAX_LOW;
    52     // Range of UCS-4 values that need surrogates in UTF-16
    53     //
    54     public static final int UCS4_MIN = 0x10000;
    55     public static final int UCS4_MAX = (1 << 20) + UCS4_MIN - 1;
    57     /**
    58      * Tells whether or not the given UTF-16 value is a high surrogate.
    59      */
    60     public static boolean isHigh(int c) {
    61         return (MIN_HIGH <= c) && (c <= MAX_HIGH);
    62     }
    64     /**
    65      * Tells whether or not the given UTF-16 value is a low surrogate.
    66      */
    67     public static boolean isLow(int c) {
    68         return (MIN_LOW <= c) && (c <= MAX_LOW);
    69     }
    71     /**
    72      * Tells whether or not the given UTF-16 value is a surrogate character,
    73      */
    74     public static boolean is(int c) {
    75         return (MIN <= c) && (c <= MAX);
    76     }
    78     /**
    79      * Tells whether or not the given UCS-4 character must be represented as a
    80      * surrogate pair in UTF-16.
    81      */
    82     public static boolean neededFor(int uc) {
    83         return (uc >= UCS4_MIN) && (uc <= UCS4_MAX);
    84     }
    86     /**
    87      * Returns the high UTF-16 surrogate for the given UCS-4 character.
    88      */
    89     public static char high(int uc) {
    90         return (char)(0xd800 | (((uc - UCS4_MIN) >> 10) & 0x3ff));
    91     }
    93     /**
    94      * Returns the low UTF-16 surrogate for the given UCS-4 character.
    95      */
    96     public static char low(int uc) {
    97         return (char)(0xdc00 | ((uc - UCS4_MIN) & 0x3ff));
    98     }
   100     /**
   101      * Converts the given surrogate pair into a 32-bit UCS-4 character.
   102      */
   103     public static int toUCS4(char c, char d) {
   104         return (((c & 0x3ff) << 10) | (d & 0x3ff)) + 0x10000;
   105     }
   107     /**
   108      * Surrogate parsing support.  Charset implementations may use instances of
   109      * this class to handle the details of parsing UTF-16 surrogate pairs.
   110      */
   111     public static class Parser {
   113         public Parser() { }
   115         private int character;          // UCS-4
   116         private CoderResult error = CoderResult.UNDERFLOW;
   117         private boolean isPair;
   119         /**
   120          * Returns the UCS-4 character previously parsed.
   121          */
   122         public int character() {
   123             return character;
   124         }
   126         /**
   127          * Tells whether or not the previously-parsed UCS-4 character was
   128          * originally represented by a surrogate pair.
   129          */
   130         public boolean isPair() {
   131             return isPair;
   132         }
   134         /**
   135          * Returns the number of UTF-16 characters consumed by the previous
   136          * parse.
   137          */
   138         public int increment() {
   139             return isPair ? 2 : 1;
   140         }
   142         /**
   143          * If the previous parse operation detected an error, return the object
   144          * describing that error.
   145          */
   146         public CoderResult error() {
   147             return error;
   148         }
   150         /**
   151          * Returns an unmappable-input result object, with the appropriate
   152          * input length, for the previously-parsed character.
   153          */
   154         public CoderResult unmappableResult() {
   155             return CoderResult.unmappableForLength(isPair ? 2 : 1);
   156         }
   158         /**
   159          * Parses a UCS-4 character from the given source buffer, handling
   160          * surrogates.
   161          *
   162          * @param  c    The first character
   163          * @param  in   The source buffer, from which one more character
   164          *              will be consumed if c is a high surrogate
   165          *
   166          * @return   Either a parsed UCS-4 character, in which case the isPair()
   167          *           and increment() methods will return meaningful values, or
   168          *           -1, in which case error() will return a descriptive result
   169          *           object
   170          */
   171         public int parse(char c, CharBuffer in) {
   172             if (isHigh(c)) {
   173                 if (!in.hasRemaining()) {
   174                     error = CoderResult.UNDERFLOW;
   175                     return -1;
   176                 }
   177                 char d = in.get();
   178                 if (isLow(d)) {
   179                     character = toUCS4(c, d);
   180                     isPair = true;
   181                     error = null;
   182                     return character;
   183                 }
   184                 error = CoderResult.malformedForLength(1);
   185                 return -1;
   186             }
   187             if (isLow(c)) {
   188                 error = CoderResult.malformedForLength(1);
   189                 return -1;
   190             }
   191             character = c;
   192             isPair = false;
   193             error = null;
   194             return character;
   195         }
   197         /**
   198          * Parses a UCS-4 character from the given source buffer, handling
   199          * surrogates.
   200          *
   201          * @param  c    The first character
   202          * @param  ia   The input array, from which one more character
   203          *              will be consumed if c is a high surrogate
   204          * @param  ip   The input index
   205          * @param  il   The input limit
   206          *
   207          * @return   Either a parsed UCS-4 character, in which case the isPair()
   208          *           and increment() methods will return meaningful values, or
   209          *           -1, in which case error() will return a descriptive result
   210          *           object
   211          */
   212         public int parse(char c, char[] ia, int ip, int il) {
   213             if (isHigh(c)) {
   214                 if (il - ip < 2) {
   215                     error = CoderResult.UNDERFLOW;
   216                     return -1;
   217                 }
   218                 char d = ia[ip + 1];
   219                 if (isLow(d)) {
   220                     character = toUCS4(c, d);
   221                     isPair = true;
   222                     error = null;
   223                     return character;
   224                 }
   225                 error = CoderResult.malformedForLength(1);
   226                 return -1;
   227             }
   228             if (isLow(c)) {
   229                 error = CoderResult.malformedForLength(1);
   230                 return -1;
   231             }
   232             character = c;
   233             isPair = false;
   234             error = null;
   235             return character;
   236         }
   238     }
   240     /**
   241      * Surrogate generation support.  Charset implementations may use instances
   242      * of this class to handle the details of generating UTF-16 surrogate
   243      * pairs.
   244      */
   245     public static class Generator {
   247         public Generator() { }
   249         private CoderResult error = CoderResult.OVERFLOW;
   251         /**
   252          * If the previous generation operation detected an error, return the
   253          * object describing that error.
   254          */
   255         public CoderResult error() {
   256             return error;
   257         }
   259         /**
   260          * Generates one or two UTF-16 characters to represent the given UCS-4
   261          * character.
   262          *
   263          * @param  uc   The UCS-4 character
   264          * @param  len  The number of input bytes from which the UCS-4 value
   265          *              was constructed (used when creating result objects)
   266          * @param  dst  The destination buffer, to which one or two UTF-16
   267          *              characters will be written
   268          *
   269          * @return   Either a positive count of the number of UTF-16 characters
   270          *           written to the destination buffer, or -1, in which case
   271          *           error() will return a descriptive result object
   272          */
   273         public int generate(int uc, int len, CharBuffer dst) {
   274             if (uc <= 0xffff) {
   275                 if (is(uc)) {
   276                     error = CoderResult.malformedForLength(len);
   277                     return -1;
   278                 }
   279                 if (dst.remaining() < 1) {
   280                     error = CoderResult.OVERFLOW;
   281                     return -1;
   282                 }
   283                 dst.put((char)uc);
   284                 error = null;
   285                 return 1;
   286             }
   287             if (uc < UCS4_MIN) {
   288                 error = CoderResult.malformedForLength(len);
   289                 return -1;
   290             }
   291             if (uc <= UCS4_MAX) {
   292                 if (dst.remaining() < 2) {
   293                     error = CoderResult.OVERFLOW;
   294                     return -1;
   295                 }
   296                 dst.put(high(uc));
   297                 dst.put(low(uc));
   298                 error = null;
   299                 return 2;
   300             }
   301             error = CoderResult.unmappableForLength(len);
   302             return -1;
   303         }
   305         /**
   306          * Generates one or two UTF-16 characters to represent the given UCS-4
   307          * character.
   308          *
   309          * @param  uc   The UCS-4 character
   310          * @param  len  The number of input bytes from which the UCS-4 value
   311          *              was constructed (used when creating result objects)
   312          * @param  da   The destination array, to which one or two UTF-16
   313          *              characters will be written
   314          * @param  dp   The destination position
   315          * @param  dl   The destination limit
   316          *
   317          * @return   Either a positive count of the number of UTF-16 characters
   318          *           written to the destination buffer, or -1, in which case
   319          *           error() will return a descriptive result object
   320          */
   321         public int generate(int uc, int len, char[] da, int dp, int dl) {
   322             if (uc <= 0xffff) {
   323                 if (is(uc)) {
   324                     error = CoderResult.malformedForLength(len);
   325                     return -1;
   326                 }
   327                 if (dl - dp < 1) {
   328                     error = CoderResult.OVERFLOW;
   329                     return -1;
   330                 }
   331                 da[dp] = (char)uc;
   332                 error = null;
   333                 return 1;
   334             }
   335             if (uc < UCS4_MIN) {
   336                 error = CoderResult.malformedForLength(len);
   337                 return -1;
   338             }
   339             if (uc <= UCS4_MAX) {
   340                 if (dl - dp < 2) {
   341                     error = CoderResult.OVERFLOW;
   342                     return -1;
   343                 }
   344                 da[dp] = high(uc);
   345                 da[dp + 1] = low(uc);
   346                 error = null;
   347                 return 2;
   348             }
   349             error = CoderResult.unmappableForLength(len);
   350             return -1;
   351         }
   353     }
   355 }

mercurial